// See https://raw.githubusercontent.com/duckdb/duckdb/master/LICENSE for licensing information //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_search_path.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { class ClientContext; struct CatalogSearchEntry { CatalogSearchEntry(string catalog, string schema); string catalog; string schema; public: string ToString() const; static string ListToString(const vector &input); static CatalogSearchEntry Parse(const string &input); static vector ParseList(const string &input); private: static CatalogSearchEntry ParseInternal(const string &input, idx_t &pos); static string WriteOptionallyQuoted(const string &input); }; enum class CatalogSetPathType { SET_SCHEMA, SET_SCHEMAS }; //! The schema search path, in order by which entries are searched if no schema entry is provided class CatalogSearchPath { public: DUCKDB_API explicit CatalogSearchPath(ClientContext &client_p); CatalogSearchPath(const CatalogSearchPath &other) = delete; DUCKDB_API void Set(CatalogSearchEntry new_value, CatalogSetPathType set_type); DUCKDB_API void Set(vector new_paths, CatalogSetPathType set_type); DUCKDB_API void Reset(); DUCKDB_API const vector &Get(); const vector &GetSetPaths() { return set_paths; } DUCKDB_API const CatalogSearchEntry &GetDefault(); DUCKDB_API string GetDefaultSchema(const string &catalog); DUCKDB_API string GetDefaultCatalog(const string &schema); DUCKDB_API vector GetSchemasForCatalog(const string &catalog); DUCKDB_API vector GetCatalogsForSchema(const string &schema); DUCKDB_API bool SchemaInSearchPath(ClientContext &context, const string &catalog_name, const string &schema_name); private: void SetPaths(vector new_paths); string GetSetName(CatalogSetPathType set_type); private: ClientContext &context; vector paths; //! Only the paths that were explicitly set (minus the always included paths) vector set_paths; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/aggregate_function_catalog_entry.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/function_entry.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! An aggregate function in the catalog class FunctionEntry : public StandardEntry { public: FunctionEntry(CatalogType type, Catalog &catalog, SchemaCatalogEntry &schema, CreateFunctionInfo &info) : StandardEntry(type, schema, catalog, info.name) { description = std::move(info.description); parameter_names = std::move(info.parameter_names); example = std::move(info.example); } //! The description (if any) string description; //! Parameter names (if any) vector parameter_names; //! The example (if any) string example; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_data/create_aggregate_function_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct CreateAggregateFunctionInfo : public CreateFunctionInfo { explicit CreateAggregateFunctionInfo(AggregateFunction function); explicit CreateAggregateFunctionInfo(AggregateFunctionSet set); AggregateFunctionSet functions; public: unique_ptr Copy() const override; }; } // namespace duckdb namespace duckdb { //! An aggregate function in the catalog class AggregateFunctionCatalogEntry : public FunctionEntry { public: static constexpr const CatalogType Type = CatalogType::AGGREGATE_FUNCTION_ENTRY; static constexpr const char *Name = "aggregate function"; public: AggregateFunctionCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateAggregateFunctionInfo &info) : FunctionEntry(CatalogType::AGGREGATE_FUNCTION_ENTRY, catalog, schema, info), functions(info.functions) { } //! The aggregate functions AggregateFunctionSet functions; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/collate_catalog_entry.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_data/create_collation_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct CreateCollationInfo : public CreateInfo { DUCKDB_API CreateCollationInfo(string name_p, ScalarFunction function_p, bool combinable_p, bool not_required_for_equality_p); //! The name of the collation string name; //! The collation function to push in case collation is required ScalarFunction function; //! Whether or not the collation can be combined with other collations. bool combinable; //! Whether or not the collation is required for equality comparisons or not. For many collations a binary //! comparison for equality comparisons is correct, allowing us to skip the collation in these cases which greatly //! speeds up processing. bool not_required_for_equality; protected: void SerializeInternal(Serializer &) const override; public: unique_ptr Copy() const override; }; } // namespace duckdb namespace duckdb { //! A collation catalog entry class CollateCatalogEntry : public StandardEntry { public: static constexpr const CatalogType Type = CatalogType::COLLATION_ENTRY; static constexpr const char *Name = "collation"; public: CollateCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateCollationInfo &info) : StandardEntry(CatalogType::COLLATION_ENTRY, schema, catalog, info.name), function(info.function), combinable(info.combinable), not_required_for_equality(info.not_required_for_equality) { } //! The collation function to push in case collation is required ScalarFunction function; //! Whether or not the collation can be combined with other collations. bool combinable; //! Whether or not the collation is required for equality comparisons or not. For many collations a binary //! comparison for equality comparisons is correct, allowing us to skip the collation in these cases which greatly //! speeds up processing. bool not_required_for_equality; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/copy_function_catalog_entry.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class Catalog; struct CreateCopyFunctionInfo; //! A table function in the catalog class CopyFunctionCatalogEntry : public StandardEntry { public: static constexpr const CatalogType Type = CatalogType::COPY_FUNCTION_ENTRY; static constexpr const char *Name = "copy function"; public: CopyFunctionCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateCopyFunctionInfo &info); //! The copy function CopyFunction function; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/index_catalog_entry.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_data/create_index_info.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/index_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //===--------------------------------------------------------------------===// // Index Types //===--------------------------------------------------------------------===// enum class IndexType : uint8_t { INVALID = 0, // invalid index type ART = 1 // Adaptive Radix Tree }; //===--------------------------------------------------------------------===// // Index Constraint Types //===--------------------------------------------------------------------===// enum IndexConstraintType : uint8_t { NONE = 0, // index is an index don't built to any constraint UNIQUE = 1, // index is an index built to enforce a UNIQUE constraint PRIMARY = 2, // index is an index built to enforce a PRIMARY KEY constraint FOREIGN = 3 // index is an index built to enforce a FOREIGN KEY constraint }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/tableref/basetableref.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! Represents a TableReference to a base table in the schema class BaseTableRef : public TableRef { public: static constexpr const TableReferenceType TYPE = TableReferenceType::BASE_TABLE; public: BaseTableRef() : TableRef(TableReferenceType::BASE_TABLE), catalog_name(INVALID_CATALOG), schema_name(INVALID_SCHEMA) { } //! The catalog name string catalog_name; //! Schema name string schema_name; //! Table name string table_name; //! Aliases for the column names vector column_name_alias; public: string ToString() const override; bool Equals(const TableRef &other_p) const override; unique_ptr Copy() override; //! Serializes a blob into a BaseTableRef void Serialize(FieldWriter &serializer) const override; //! Deserializes a blob back into a BaseTableRef static unique_ptr Deserialize(FieldReader &source); void FormatSerialize(FormatSerializer &serializer) const override; static unique_ptr FormatDeserialize(FormatDeserializer &source); }; } // namespace duckdb namespace duckdb { struct CreateIndexInfo : public CreateInfo { CreateIndexInfo() : CreateInfo(CatalogType::INDEX_ENTRY) { } //! Index Type (e.g., B+-tree, Skip-List, ...) IndexType index_type; //! Name of the Index string index_name; //! Index Constraint Type IndexConstraintType constraint_type; //! The table to create the index on unique_ptr table; //! Set of expressions to index by vector> expressions; vector> parsed_expressions; //! Types used for the CREATE INDEX scan vector scan_types; //! The names of the columns, used for the CREATE INDEX scan vector names; //! Column IDs needed for index creation vector column_ids; protected: void SerializeInternal(Serializer &serializer) const override; public: DUCKDB_API unique_ptr Copy() const override; static unique_ptr Deserialize(Deserializer &deserializer); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/meta_block_writer.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/block.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class Block : public FileBuffer { public: Block(Allocator &allocator, block_id_t id); Block(Allocator &allocator, block_id_t id, uint32_t internal_size); Block(FileBuffer &source, block_id_t id); block_id_t id; }; struct BlockPointer { BlockPointer(block_id_t block_id_p, uint32_t offset_p) : block_id(block_id_p), offset(offset_p) {}; BlockPointer() {}; block_id_t block_id {0}; uint32_t offset {0}; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/block_manager.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class BlockHandle; class BufferManager; class ClientContext; class DatabaseInstance; //! BlockManager is an abstract representation to manage blocks on DuckDB. When writing or reading blocks, the //! BlockManager creates and accesses blocks. The concrete types implements how blocks are stored. class BlockManager { public: explicit BlockManager(BufferManager &buffer_manager) : buffer_manager(buffer_manager) { } virtual ~BlockManager() = default; //! The buffer manager BufferManager &buffer_manager; public: //! Creates a new block inside the block manager virtual unique_ptr ConvertBlock(block_id_t block_id, FileBuffer &source_buffer) = 0; virtual unique_ptr CreateBlock(block_id_t block_id, FileBuffer *source_buffer) = 0; //! Return the next free block id virtual block_id_t GetFreeBlockId() = 0; //! Returns whether or not a specified block is the root block virtual bool IsRootBlock(block_id_t root) = 0; //! Mark a block as "free"; free blocks are immediately added to the free list and can be immediately overwritten virtual void MarkBlockAsFree(block_id_t block_id) = 0; //! Mark a block as "modified"; modified blocks are added to the free list after a checkpoint (i.e. their data is //! assumed to be rewritten) virtual void MarkBlockAsModified(block_id_t block_id) = 0; //! Increase the reference count of a block. The block should hold at least one reference before this method is //! called. virtual void IncreaseBlockReferenceCount(block_id_t block_id) = 0; //! Get the first meta block id virtual block_id_t GetMetaBlock() = 0; //! Read the content of the block from disk virtual void Read(Block &block) = 0; //! Writes the block to disk virtual void Write(FileBuffer &block, block_id_t block_id) = 0; //! Writes the block to disk void Write(Block &block) { Write(block, block.id); } //! Write the header; should be the final step of a checkpoint virtual void WriteHeader(DatabaseHeader header) = 0; //! Returns the number of total blocks virtual idx_t TotalBlocks() = 0; //! Returns the number of free blocks virtual idx_t FreeBlocks() = 0; //! Register a block with the given block id in the base file shared_ptr RegisterBlock(block_id_t block_id, bool is_meta_block = false); //! Clear cached handles for meta blocks void ClearMetaBlockHandles(); //! Convert an existing in-memory buffer into a persistent disk-backed block shared_ptr ConvertToPersistent(block_id_t block_id, shared_ptr old_block); void UnregisterBlock(block_id_t block_id, bool can_destroy); static BlockManager &GetBlockManager(ClientContext &context); static BlockManager &GetBlockManager(DatabaseInstance &db); private: //! The lock for the set of blocks mutex blocks_lock; //! A mapping of block id -> BlockHandle unordered_map> blocks; //! A map to cache the BlockHandles of meta blocks unordered_map> meta_blocks; }; } // namespace duckdb namespace duckdb { class DatabaseInstance; //! This struct is responsible for writing data to disk in a stream of blocks. class MetaBlockWriter : public Serializer { public: MetaBlockWriter(BlockManager &block_manager, block_id_t initial_block_id = INVALID_BLOCK); ~MetaBlockWriter() override; BlockManager &block_manager; protected: unique_ptr block; set written_blocks; idx_t offset; public: BlockPointer GetBlockPointer(); virtual void Flush(); void WriteData(const_data_ptr_t buffer, idx_t write_size) override; void MarkWrittenBlocks() { for (auto &block_id : written_blocks) { block_manager.MarkBlockAsModified(block_id); } } protected: virtual block_id_t GetNextBlockId(); void AdvanceBlock(); }; } // namespace duckdb namespace duckdb { struct DataTableInfo; class Index; //! An index catalog entry class IndexCatalogEntry : public StandardEntry { public: static constexpr const CatalogType Type = CatalogType::INDEX_ENTRY; static constexpr const char *Name = "index"; public: //! Create an IndexCatalogEntry and initialize storage for it IndexCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateIndexInfo &info); optional_ptr index; string sql; vector> expressions; vector> parsed_expressions; public: string ToSQL() const override; void Serialize(Serializer &serializer) const; static unique_ptr Deserialize(Deserializer &source, ClientContext &context); virtual string GetSchemaName() const = 0; virtual string GetTableName() const = 0; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/macro_catalog_entry.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_data/create_macro_info.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/function/macro_function.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/expression/constant_expression.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! ConstantExpression represents a constant value in the query class ConstantExpression : public ParsedExpression { public: static constexpr const ExpressionClass TYPE = ExpressionClass::CONSTANT; public: DUCKDB_API explicit ConstantExpression(Value val); //! The constant value referenced Value value; public: string ToString() const override; static bool Equal(const ConstantExpression &a, const ConstantExpression &b); hash_t Hash() const override; unique_ptr Copy() const override; void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(ExpressionType type, FieldReader &source); void FormatSerialize(FormatSerializer &serializer) const override; static unique_ptr FormatDeserialize(ExpressionType type, FormatDeserializer &deserializer); }; } // namespace duckdb namespace duckdb { enum class MacroType : uint8_t { VOID_MACRO = 0, TABLE_MACRO = 1, SCALAR_MACRO = 2 }; class MacroFunction { public: explicit MacroFunction(MacroType type); //! The type MacroType type; //! The positional parameters vector> parameters; //! The default parameters and their associated values unordered_map> default_parameters; public: virtual ~MacroFunction() { } void CopyProperties(MacroFunction &other) const; virtual unique_ptr Copy() const = 0; static string ValidateArguments(MacroFunction ¯o_function, const string &name, FunctionExpression &function_expr, vector> &positionals, unordered_map> &defaults); virtual string ToSQL(const string &schema, const string &name) const; void Serialize(Serializer &serializer) const; static unique_ptr Deserialize(Deserializer &deserializer); protected: virtual void SerializeInternal(FieldWriter &writer) const = 0; public: template TARGET &Cast() { if (type != TARGET::TYPE) { throw InternalException("Failed to cast macro to type - macro type mismatch"); } return reinterpret_cast(*this); } template const TARGET &Cast() const { if (type != TARGET::TYPE) { throw InternalException("Failed to cast macro to type - macro type mismatch"); } return reinterpret_cast(*this); } }; } // namespace duckdb namespace duckdb { struct CreateMacroInfo : public CreateFunctionInfo { CreateMacroInfo(); CreateMacroInfo(CatalogType type); unique_ptr function; public: unique_ptr Copy() const override; DUCKDB_API static unique_ptr Deserialize(Deserializer &deserializer); protected: void SerializeInternal(Serializer &) const override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/macro_catalog_entry.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! A macro function in the catalog class MacroCatalogEntry : public FunctionEntry { public: MacroCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateMacroInfo &info); //! The macro function unique_ptr function; public: virtual unique_ptr GetInfoForSerialization() const; //! Serialize the meta information virtual void Serialize(Serializer &serializer) const; static unique_ptr Deserialize(Deserializer &main_source, ClientContext &context); string ToSQL() const override { return function->ToSQL(schema.name, name); } }; } // namespace duckdb namespace duckdb { //! A macro function in the catalog class ScalarMacroCatalogEntry : public MacroCatalogEntry { public: static constexpr const CatalogType Type = CatalogType::MACRO_ENTRY; static constexpr const char *Name = "macro function"; public: ScalarMacroCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateMacroInfo &info); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/pragma_function_catalog_entry.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class Catalog; struct CreatePragmaFunctionInfo; //! A table function in the catalog class PragmaFunctionCatalogEntry : public FunctionEntry { public: static constexpr const CatalogType Type = CatalogType::PRAGMA_FUNCTION_ENTRY; static constexpr const char *Name = "pragma function"; public: PragmaFunctionCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreatePragmaFunctionInfo &info); //! The pragma functions PragmaFunctionSet functions; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_data/create_scalar_function_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct CreateScalarFunctionInfo : public CreateFunctionInfo { DUCKDB_API explicit CreateScalarFunctionInfo(ScalarFunction function); DUCKDB_API explicit CreateScalarFunctionInfo(ScalarFunctionSet set); ScalarFunctionSet functions; public: DUCKDB_API unique_ptr Copy() const override; DUCKDB_API unique_ptr GetAlterInfo() const override; }; } // namespace duckdb namespace duckdb { //! A table function in the catalog class ScalarFunctionCatalogEntry : public FunctionEntry { public: static constexpr const CatalogType Type = CatalogType::SCALAR_FUNCTION_ENTRY; static constexpr const char *Name = "scalar function"; public: ScalarFunctionCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateScalarFunctionInfo &info); //! The scalar functions ScalarFunctionSet functions; public: unique_ptr AlterEntry(ClientContext &context, AlterInfo &info) override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/table_catalog_entry.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/column_list.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! A set of column definitions class ColumnList { public: class ColumnListIterator; public: DUCKDB_API ColumnList(bool allow_duplicate_names = false); DUCKDB_API void AddColumn(ColumnDefinition column); void Finalize(); DUCKDB_API const ColumnDefinition &GetColumn(LogicalIndex index) const; DUCKDB_API const ColumnDefinition &GetColumn(PhysicalIndex index) const; DUCKDB_API const ColumnDefinition &GetColumn(const string &name) const; DUCKDB_API ColumnDefinition &GetColumnMutable(LogicalIndex index); DUCKDB_API ColumnDefinition &GetColumnMutable(PhysicalIndex index); DUCKDB_API ColumnDefinition &GetColumnMutable(const string &name); DUCKDB_API vector GetColumnNames() const; DUCKDB_API vector GetColumnTypes() const; DUCKDB_API bool ColumnExists(const string &name) const; DUCKDB_API LogicalIndex GetColumnIndex(string &column_name) const; DUCKDB_API PhysicalIndex LogicalToPhysical(LogicalIndex index) const; DUCKDB_API LogicalIndex PhysicalToLogical(PhysicalIndex index) const; idx_t LogicalColumnCount() const { return columns.size(); } idx_t PhysicalColumnCount() const { return physical_columns.size(); } bool empty() const { return columns.empty(); } ColumnList Copy() const; void Serialize(FieldWriter &writer) const; static ColumnList Deserialize(FieldReader &reader); DUCKDB_API ColumnListIterator Logical() const; DUCKDB_API ColumnListIterator Physical() const; void SetAllowDuplicates(bool allow_duplicates) { allow_duplicate_names = allow_duplicates; } private: vector columns; //! A map of column name to column index case_insensitive_map_t name_map; //! The set of physical columns vector physical_columns; //! Allow duplicate names or not bool allow_duplicate_names; private: void AddToNameMap(ColumnDefinition &column); public: // logical iterator class ColumnListIterator { public: ColumnListIterator(const ColumnList &list, bool physical) : list(list), physical(physical) { } private: const ColumnList &list; bool physical; private: class ColumnLogicalIteratorInternal { public: ColumnLogicalIteratorInternal(const ColumnList &list, bool physical, idx_t pos, idx_t end) : list(list), physical(physical), pos(pos), end(end) { } const ColumnList &list; bool physical; idx_t pos; idx_t end; public: ColumnLogicalIteratorInternal &operator++() { pos++; return *this; } bool operator!=(const ColumnLogicalIteratorInternal &other) const { return pos != other.pos || end != other.end || &list != &other.list; } const ColumnDefinition &operator*() const { if (physical) { return list.GetColumn(PhysicalIndex(pos)); } else { return list.GetColumn(LogicalIndex(pos)); } } }; public: idx_t Size() { return physical ? list.PhysicalColumnCount() : list.LogicalColumnCount(); } ColumnLogicalIteratorInternal begin() { return ColumnLogicalIteratorInternal(list, physical, 0, Size()); } ColumnLogicalIteratorInternal end() { return ColumnLogicalIteratorInternal(list, physical, Size(), Size()); } }; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/bound_constraint.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! Bound equivalent of Constraint class BoundConstraint { public: explicit BoundConstraint(ConstraintType type) : type(type) {}; virtual ~BoundConstraint() { } void Serialize(Serializer &serializer) const { serializer.Write(type); } static unique_ptr Deserialize(Deserializer &source) { return make_uniq(source.Read()); } ConstraintType type; public: template TARGET &Cast() { if (type != TARGET::TYPE) { throw InternalException("Failed to cast constraint to type - bound constraint type mismatch"); } return reinterpret_cast(*this); } template const TARGET &Cast() const { if (type != TARGET::TYPE) { throw InternalException("Failed to cast constraint to type - bound constraint type mismatch"); } return reinterpret_cast(*this); } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/column_dependency_manager.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/stack.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { using std::stack; } //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/index_map.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct LogicalIndexHashFunction { uint64_t operator()(const LogicalIndex &index) const { return std::hash()(index.index); } }; struct PhysicalIndexHashFunction { uint64_t operator()(const PhysicalIndex &index) const { return std::hash()(index.index); } }; template using logical_index_map_t = unordered_map; using logical_index_set_t = unordered_set; template using physical_index_map_t = unordered_map; using physical_index_set_t = unordered_set; } // namespace duckdb namespace duckdb { //! Dependency Manager local to a table, responsible for keeping track of generated column dependencies class ColumnDependencyManager { public: DUCKDB_API ColumnDependencyManager(); DUCKDB_API ~ColumnDependencyManager(); ColumnDependencyManager(ColumnDependencyManager &&other) = default; ColumnDependencyManager(const ColumnDependencyManager &other) = delete; public: //! Get the bind order that ensures dependencies are resolved before dependents are stack GetBindOrder(const ColumnList &columns); //! Adds a connection between the dependent and its dependencies void AddGeneratedColumn(LogicalIndex index, const vector &indices, bool root = true); //! Add a generated column from a column definition void AddGeneratedColumn(const ColumnDefinition &column, const ColumnList &list); //! Removes the column(s) and outputs the new column indices vector RemoveColumn(LogicalIndex index, idx_t column_amount); bool IsDependencyOf(LogicalIndex dependent, LogicalIndex dependency) const; bool HasDependencies(LogicalIndex index) const; const logical_index_set_t &GetDependencies(LogicalIndex index) const; bool HasDependents(LogicalIndex index) const; const logical_index_set_t &GetDependents(LogicalIndex index) const; private: void RemoveStandardColumn(LogicalIndex index); void RemoveGeneratedColumn(LogicalIndex index); void AdjustSingle(LogicalIndex idx, idx_t offset); // Clean up the gaps created by a Remove operation vector CleanupInternals(idx_t column_amount); private: //! A map of column dependency to generated column(s) logical_index_map_t dependencies_map; //! A map of generated column name to (potentially generated)column dependencies logical_index_map_t dependents_map; //! For resolve-order purposes, keep track of the 'direct' (not inherited) dependencies of a generated column logical_index_map_t direct_dependencies; logical_index_set_t deleted_columns; }; } // namespace duckdb namespace duckdb { class DataTable; struct CreateTableInfo; struct BoundCreateTableInfo; struct RenameColumnInfo; struct AddColumnInfo; struct RemoveColumnInfo; struct SetDefaultInfo; struct ChangeColumnTypeInfo; struct AlterForeignKeyInfo; struct SetNotNullInfo; struct DropNotNullInfo; class TableFunction; struct FunctionData; class TableColumnInfo; struct ColumnSegmentInfo; class TableStorageInfo; class LogicalGet; class LogicalProjection; class LogicalUpdate; //! A table catalog entry class TableCatalogEntry : public StandardEntry { public: static constexpr const CatalogType Type = CatalogType::TABLE_ENTRY; static constexpr const char *Name = "table"; public: //! Create a TableCatalogEntry and initialize storage for it DUCKDB_API TableCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateTableInfo &info); public: DUCKDB_API bool HasGeneratedColumns() const; //! Returns whether or not a column with the given name exists DUCKDB_API bool ColumnExists(const string &name); //! Returns a reference to the column of the specified name. Throws an //! exception if the column does not exist. DUCKDB_API const ColumnDefinition &GetColumn(const string &name); //! Returns a reference to the column of the specified logical index. Throws an //! exception if the column does not exist. DUCKDB_API const ColumnDefinition &GetColumn(LogicalIndex idx); //! Returns a list of types of the table, excluding generated columns DUCKDB_API vector GetTypes(); //! Returns a list of the columns of the table DUCKDB_API const ColumnList &GetColumns() const; //! Returns a mutable list of the columns of the table DUCKDB_API ColumnList &GetColumnsMutable(); //! Returns the underlying storage of the table virtual DataTable &GetStorage(); //! Returns a list of the bound constraints of the table virtual const vector> &GetBoundConstraints(); //! Returns a list of the constraints of the table DUCKDB_API const vector> &GetConstraints(); DUCKDB_API string ToSQL() const override; //! Get statistics of a column (physical or virtual) within the table virtual unique_ptr GetStatistics(ClientContext &context, column_t column_id) = 0; //! Serialize the meta information of the TableCatalogEntry a serializer void Serialize(Serializer &serializer) const; //! Deserializes to a CreateTableInfo static unique_ptr Deserialize(Deserializer &source, ClientContext &context); //! Returns the column index of the specified column name. //! If the column does not exist: //! If if_column_exists is true, returns DConstants::INVALID_INDEX //! If if_column_exists is false, throws an exception DUCKDB_API LogicalIndex GetColumnIndex(string &name, bool if_exists = false); //! Returns the scan function that can be used to scan the given table virtual TableFunction GetScanFunction(ClientContext &context, unique_ptr &bind_data) = 0; virtual bool IsDuckTable() const { return false; } DUCKDB_API static string ColumnsToSQL(const ColumnList &columns, const vector> &constraints); //! Returns a list of segment information for this table, if exists virtual vector GetColumnSegmentInfo(); //! Returns the storage info of this table virtual TableStorageInfo GetStorageInfo(ClientContext &context) = 0; virtual void BindUpdateConstraints(LogicalGet &get, LogicalProjection &proj, LogicalUpdate &update, ClientContext &context); protected: // This is used to serialize the entry by #Serialize(Serializer& ). It is virtual to allow // Custom catalog implementations to override the default implementation. We can not make // The Serialize method itself virtual as the logic is tightly coupled to the static // Deserialize method. virtual CreateTableInfo GetTableInfoForSerialization() const; //! A list of columns that are part of this table ColumnList columns; //! A list of constraints that are part of this table vector> constraints; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! A table function in the catalog class TableFunctionCatalogEntry : public FunctionEntry { public: static constexpr const CatalogType Type = CatalogType::TABLE_FUNCTION_ENTRY; static constexpr const char *Name = "table function"; public: TableFunctionCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateTableFunctionInfo &info); //! The table function TableFunctionSet functions; public: unique_ptr AlterEntry(ClientContext &context, AlterInfo &info) override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/view_catalog_entry.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class DataTable; struct CreateViewInfo; //! A view catalog entry class ViewCatalogEntry : public StandardEntry { public: static constexpr const CatalogType Type = CatalogType::VIEW_ENTRY; static constexpr const char *Name = "view"; public: //! Create a real TableCatalogEntry and initialize storage for it ViewCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateViewInfo &info); //! The query of the view unique_ptr query; //! The SQL query (if any) string sql; //! The set of aliases associated with the view vector aliases; //! The returned types of the view vector types; public: unique_ptr AlterEntry(ClientContext &context, AlterInfo &info) override; //! Serialize the meta information of the ViewCatalogEntry a serializer virtual void Serialize(Serializer &serializer) const; //! Deserializes to a CreateTableInfo static unique_ptr Deserialize(Deserializer &source, ClientContext &context); unique_ptr Copy(ClientContext &context) const override; string ToSQL() const override; private: void Initialize(CreateViewInfo &info); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/default/default_schemas.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class DefaultSchemaGenerator : public DefaultGenerator { public: explicit DefaultSchemaGenerator(Catalog &catalog); public: unique_ptr CreateDefaultEntry(ClientContext &context, const string &entry_name) override; vector GetDefaultEntries() override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/type_catalog_entry.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_data/create_type_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct CreateTypeInfo : public CreateInfo { CreateTypeInfo(); CreateTypeInfo(string name_p, LogicalType type_p); //! Name of the Type string name; //! Logical Type LogicalType type; //! Used by create enum from query unique_ptr query; public: unique_ptr Copy() const override; DUCKDB_API static unique_ptr Deserialize(Deserializer &deserializer); protected: void SerializeInternal(Serializer &) const override; }; } // namespace duckdb namespace duckdb { class Serializer; class Deserializer; //! A type catalog entry class TypeCatalogEntry : public StandardEntry { public: static constexpr const CatalogType Type = CatalogType::TYPE_ENTRY; static constexpr const char *Name = "type"; public: //! Create a TypeCatalogEntry and initialize storage for it TypeCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateTypeInfo &info); LogicalType user_type; public: //! Serialize the meta information of the TypeCatalogEntry a serializer virtual void Serialize(Serializer &serializer) const; //! Deserializes to a TypeCatalogEntry static unique_ptr Deserialize(Deserializer &source); string ToSQL() const override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/main/client_data.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class AttachedDatabase; class BufferedFileWriter; class ClientContext; class CatalogSearchPath; class FileOpener; class FileSystem; class HTTPState; class QueryProfiler; class QueryProfilerHistory; class PreparedStatementData; class SchemaCatalogEntry; struct RandomEngine; struct ClientData { ClientData(ClientContext &context); ~ClientData(); //! Query profiler shared_ptr profiler; //! QueryProfiler History unique_ptr query_profiler_history; //! The set of temporary objects that belong to this client shared_ptr temporary_objects; //! The set of bound prepared statements that belong to this client case_insensitive_map_t> prepared_statements; //! The writer used to log queries (if logging is enabled) unique_ptr log_query_writer; //! The random generator used by random(). Its seed value can be set by setseed(). unique_ptr random_engine; //! The catalog search path unique_ptr catalog_search_path; //! The file opener of the client context unique_ptr file_opener; //! HTTP State in this query shared_ptr http_state; //! The clients' file system wrapper unique_ptr client_file_system; //! The file search path string file_search_path; //! The Max Line Length Size of Last Query Executed on a CSV File. (Only used for testing) //! FIXME: this should not be done like this bool debug_set_max_line_length = false; idx_t debug_max_line_length = 0; public: DUCKDB_API static ClientData &Get(ClientContext &context); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/expression/function_expression.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! Represents a function call class FunctionExpression : public ParsedExpression { public: static constexpr const ExpressionClass TYPE = ExpressionClass::FUNCTION; public: DUCKDB_API FunctionExpression(string catalog_name, string schema_name, const string &function_name, vector> children, unique_ptr filter = nullptr, unique_ptr order_bys = nullptr, bool distinct = false, bool is_operator = false, bool export_state = false); DUCKDB_API FunctionExpression(const string &function_name, vector> children, unique_ptr filter = nullptr, unique_ptr order_bys = nullptr, bool distinct = false, bool is_operator = false, bool export_state = false); //! Catalog of the function string catalog; //! Schema of the function string schema; //! Function name string function_name; //! Whether or not the function is an operator, only used for rendering bool is_operator; //! List of arguments to the function vector> children; //! Whether or not the aggregate function is distinct, only used for aggregates bool distinct; //! Expression representing a filter, only used for aggregates unique_ptr filter; //! Modifier representing an ORDER BY, only used for aggregates unique_ptr order_bys; //! whether this function should export its state or not bool export_state; public: string ToString() const override; unique_ptr Copy() const override; static bool Equal(const FunctionExpression &a, const FunctionExpression &b); hash_t Hash() const override; void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(ExpressionType type, FieldReader &source); void FormatSerialize(FormatSerializer &serializer) const override; static unique_ptr FormatDeserialize(ExpressionType type, FormatDeserializer &deserializer); void Verify() const override; public: template static string ToString(const T &entry, const string &schema, const string &function_name, bool is_operator = false, bool distinct = false, BASE *filter = nullptr, ORDER_MODIFIER *order_bys = nullptr, bool export_state = false, bool add_alias = false) { if (is_operator) { // built-in operator D_ASSERT(!distinct); if (entry.children.size() == 1) { if (StringUtil::Contains(function_name, "__postfix")) { return "((" + entry.children[0]->ToString() + ")" + StringUtil::Replace(function_name, "__postfix", "") + ")"; } else { return function_name + "(" + entry.children[0]->ToString() + ")"; } } else if (entry.children.size() == 2) { return StringUtil::Format("(%s %s %s)", entry.children[0]->ToString(), function_name, entry.children[1]->ToString()); } } // standard function call string result = schema.empty() ? function_name : schema + "." + function_name; result += "("; if (distinct) { result += "DISTINCT "; } result += StringUtil::Join(entry.children, entry.children.size(), ", ", [&](const unique_ptr &child) { return child->alias.empty() || !add_alias ? child->ToString() : StringUtil::Format("%s := %s", SQLIdentifier(child->alias), child->ToString()); }); // ordered aggregate if (order_bys && !order_bys->orders.empty()) { if (entry.children.empty()) { result += ") WITHIN GROUP ("; } result += " ORDER BY "; for (idx_t i = 0; i < order_bys->orders.size(); i++) { if (i > 0) { result += ", "; } result += order_bys->orders[i].ToString(); } } result += ")"; // filtered aggregate if (filter) { result += " FILTER (WHERE " + filter->ToString() + ")"; } if (export_state) { result += " EXPORT_STATE"; } return result; } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_data/create_pragma_function_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct CreatePragmaFunctionInfo : public CreateFunctionInfo { DUCKDB_API explicit CreatePragmaFunctionInfo(PragmaFunction function); DUCKDB_API CreatePragmaFunctionInfo(string name, PragmaFunctionSet functions_); PragmaFunctionSet functions; public: DUCKDB_API unique_ptr Copy() const override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_data/create_schema_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct CreateSchemaInfo : public CreateInfo { CreateSchemaInfo() : CreateInfo(CatalogType::SCHEMA_ENTRY) { } public: unique_ptr Copy() const override { auto result = make_uniq(); CopyProperties(*result); return std::move(result); } static unique_ptr Deserialize(Deserializer &deserializer) { auto result = make_uniq(); result->DeserializeBase(deserializer); return result; } protected: void SerializeInternal(Serializer &) const override { } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_data/create_view_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class SchemaCatalogEntry; struct CreateViewInfo : public CreateInfo { CreateViewInfo(); CreateViewInfo(SchemaCatalogEntry &schema, string view_name); CreateViewInfo(string catalog_p, string schema_p, string view_name); //! Table name to insert to string view_name; //! Aliases of the view vector aliases; //! Return types vector types; //! The SelectStatement of the view unique_ptr query; public: unique_ptr Copy() const override; static unique_ptr Deserialize(Deserializer &deserializer); //! Gets a bound CreateViewInfo object from a SELECT statement and a view name, schema name, etc DUCKDB_API static unique_ptr FromSelect(ClientContext &context, unique_ptr info); //! Gets a bound CreateViewInfo object from a CREATE VIEW statement DUCKDB_API static unique_ptr FromCreateView(ClientContext &context, const string &sql); protected: void SerializeInternal(Serializer &serializer) const override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_data/drop_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct DropInfo : public ParseInfo { DropInfo(); //! The catalog type to drop CatalogType type; //! Catalog name to drop from, if any string catalog; //! Schema name to drop from, if any string schema; //! Element name to drop string name; //! Ignore if the entry does not exist instead of failing OnEntryNotFound if_not_found = OnEntryNotFound::THROW_EXCEPTION; //! Cascade drop (drop all dependents instead of throwing an error if there //! are any) bool cascade = false; //! Allow dropping of internal system entries bool allow_drop_internal = false; public: unique_ptr Copy() const; void Serialize(Serializer &serializer) const; static unique_ptr Deserialize(Deserializer &deserializer); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/parsed_data/bound_create_table_info.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_data/create_table_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class SchemaCatalogEntry; struct CreateTableInfo : public CreateInfo { DUCKDB_API CreateTableInfo(); DUCKDB_API CreateTableInfo(string catalog, string schema, string name); DUCKDB_API CreateTableInfo(SchemaCatalogEntry &schema, string name); //! Table name to insert to string table; //! List of columns of the table ColumnList columns; //! List of constraints on the table vector> constraints; //! CREATE TABLE from QUERY unique_ptr query; protected: void SerializeInternal(Serializer &serializer) const override; public: DUCKDB_API static unique_ptr Deserialize(Deserializer &deserializer); DUCKDB_API unique_ptr Copy() const override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/table/persistent_table_data.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/data_pointer.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/table/row_group.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/table/chunk_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class RowGroup; struct SelectionVector; class Transaction; struct TransactionData; enum class ChunkInfoType : uint8_t { CONSTANT_INFO, VECTOR_INFO, EMPTY_INFO }; class ChunkInfo { public: ChunkInfo(idx_t start, ChunkInfoType type) : start(start), type(type) { } virtual ~ChunkInfo() { } //! The row index of the first row idx_t start; //! The ChunkInfo type ChunkInfoType type; public: //! Gets up to max_count entries from the chunk info. If the ret is 0>ret>max_count, the selection vector is filled //! with the tuples virtual idx_t GetSelVector(TransactionData transaction, SelectionVector &sel_vector, idx_t max_count) = 0; virtual idx_t GetCommittedSelVector(transaction_t min_start_id, transaction_t min_transaction_id, SelectionVector &sel_vector, idx_t max_count) = 0; //! Returns whether or not a single row in the ChunkInfo should be used or not for the given transaction virtual bool Fetch(TransactionData transaction, row_t row) = 0; virtual void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) = 0; virtual void Serialize(Serializer &serialize) = 0; static unique_ptr Deserialize(Deserializer &source); public: template TARGET &Cast() { if (type != TARGET::TYPE) { throw InternalException("Failed to cast chunk info to type - query result type mismatch"); } return reinterpret_cast(*this); } template const TARGET &Cast() const { if (type != TARGET::TYPE) { throw InternalException("Failed to cast chunk info to type - query result type mismatch"); } return reinterpret_cast(*this); } }; class ChunkConstantInfo : public ChunkInfo { public: static constexpr const ChunkInfoType TYPE = ChunkInfoType::CONSTANT_INFO; public: explicit ChunkConstantInfo(idx_t start); atomic insert_id; atomic delete_id; public: idx_t GetSelVector(TransactionData transaction, SelectionVector &sel_vector, idx_t max_count) override; idx_t GetCommittedSelVector(transaction_t min_start_id, transaction_t min_transaction_id, SelectionVector &sel_vector, idx_t max_count) override; bool Fetch(TransactionData transaction, row_t row) override; void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) override; void Serialize(Serializer &serialize) override; static unique_ptr Deserialize(Deserializer &source); private: template idx_t TemplatedGetSelVector(transaction_t start_time, transaction_t transaction_id, SelectionVector &sel_vector, idx_t max_count); }; class ChunkVectorInfo : public ChunkInfo { public: static constexpr const ChunkInfoType TYPE = ChunkInfoType::VECTOR_INFO; public: explicit ChunkVectorInfo(idx_t start); //! The transaction ids of the transactions that inserted the tuples (if any) atomic inserted[STANDARD_VECTOR_SIZE]; atomic insert_id; atomic same_inserted_id; //! The transaction ids of the transactions that deleted the tuples (if any) atomic deleted[STANDARD_VECTOR_SIZE]; atomic any_deleted; public: idx_t GetSelVector(transaction_t start_time, transaction_t transaction_id, SelectionVector &sel_vector, idx_t max_count); idx_t GetSelVector(TransactionData transaction, SelectionVector &sel_vector, idx_t max_count) override; idx_t GetCommittedSelVector(transaction_t min_start_id, transaction_t min_transaction_id, SelectionVector &sel_vector, idx_t max_count) override; bool Fetch(TransactionData transaction, row_t row) override; void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) override; void Append(idx_t start, idx_t end, transaction_t commit_id); //! Performs a delete in the ChunkVectorInfo - returns how many tuples were actually deleted //! The number of rows that were actually deleted might be lower than the input count //! In case we delete rows that were already deleted //! Note that "rows" is written to to reflect the row ids that were actually deleted //! i.e. after calling this function, rows will hold [0..actual_delete_count] row ids of the actually deleted tuples idx_t Delete(transaction_t transaction_id, row_t rows[], idx_t count); void CommitDelete(transaction_t commit_id, row_t rows[], idx_t count); void Serialize(Serializer &serialize) override; static unique_ptr Deserialize(Deserializer &source); private: template idx_t TemplatedGetSelVector(transaction_t start_time, transaction_t transaction_id, SelectionVector &sel_vector, idx_t max_count); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/statistics/segment_statistics.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class SegmentStatistics { public: SegmentStatistics(LogicalType type); SegmentStatistics(BaseStatistics statistics); //! Type-specific statistics of the segment BaseStatistics statistics; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/scan_options.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class TableScanType : uint8_t { //! Regular table scan: scan all tuples that are relevant for the current transaction TABLE_SCAN_REGULAR = 0, //! Scan all rows, including any deleted rows. Committed updates are merged in. TABLE_SCAN_COMMITTED_ROWS = 1, //! Scan all rows, including any deleted rows. Throws an exception if there are any uncommitted updates. TABLE_SCAN_COMMITTED_ROWS_DISALLOW_UPDATES = 2, //! Scan all rows, excluding any permanently deleted rows. //! Permanently deleted rows are rows which no transaction will ever need again. TABLE_SCAN_COMMITTED_ROWS_OMIT_PERMANENTLY_DELETED = 3 }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/table/segment_base.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { template class SegmentBase { public: SegmentBase(idx_t start, idx_t count) : start(start), count(count), next(nullptr) { } T *Next() { #ifndef DUCKDB_R_BUILD return next.load(); #else return next; #endif } //! The start row id of this chunk idx_t start; //! The amount of entries in this storage chunk atomic count; //! The next segment after this one #ifndef DUCKDB_R_BUILD atomic next; #else T *next; #endif //! The index within the segment tree idx_t index; }; } // namespace duckdb namespace duckdb { class AttachedDatabase; class BlockManager; class ColumnData; class DatabaseInstance; class DataTable; class PartialBlockManager; struct DataTableInfo; class ExpressionExecutor; class RowGroupCollection; class RowGroupWriter; class UpdateSegment; class TableStatistics; struct ColumnSegmentInfo; class Vector; struct ColumnCheckpointState; struct RowGroupPointer; struct TransactionData; struct VersionNode; class CollectionScanState; class TableFilterSet; struct ColumnFetchState; struct RowGroupAppendState; struct RowGroupWriteData { vector> states; vector statistics; }; class RowGroup : public SegmentBase { public: friend class ColumnData; friend class VersionDeleteState; public: static constexpr const idx_t ROW_GROUP_SIZE = STANDARD_ROW_GROUPS_SIZE; static constexpr const idx_t ROW_GROUP_VECTOR_COUNT = ROW_GROUP_SIZE / STANDARD_VECTOR_SIZE; public: RowGroup(RowGroupCollection &collection, idx_t start, idx_t count); RowGroup(RowGroupCollection &collection, RowGroupPointer &&pointer); ~RowGroup(); private: //! The RowGroupCollection this row-group is a part of reference collection; //! The version info of the row_group (inserted and deleted tuple info) shared_ptr version_info; //! The column data of the row_group vector> columns; public: void MoveToCollection(RowGroupCollection &collection, idx_t new_start); RowGroupCollection &GetCollection() { return collection.get(); } DatabaseInstance &GetDatabase(); BlockManager &GetBlockManager(); DataTableInfo &GetTableInfo(); unique_ptr AlterType(RowGroupCollection &collection, const LogicalType &target_type, idx_t changed_idx, ExpressionExecutor &executor, CollectionScanState &scan_state, DataChunk &scan_chunk); unique_ptr AddColumn(RowGroupCollection &collection, ColumnDefinition &new_column, ExpressionExecutor &executor, Expression *default_value, Vector &intermediate); unique_ptr RemoveColumn(RowGroupCollection &collection, idx_t removed_column); void CommitDrop(); void CommitDropColumn(idx_t index); void InitializeEmpty(const vector &types); //! Initialize a scan over this row_group bool InitializeScan(CollectionScanState &state); bool InitializeScanWithOffset(CollectionScanState &state, idx_t vector_offset); //! Checks the given set of table filters against the row-group statistics. Returns false if the entire row group //! can be skipped. bool CheckZonemap(TableFilterSet &filters, const vector &column_ids); //! Checks the given set of table filters against the per-segment statistics. Returns false if any segments were //! skipped. bool CheckZonemapSegments(CollectionScanState &state); void Scan(TransactionData transaction, CollectionScanState &state, DataChunk &result); void ScanCommitted(CollectionScanState &state, DataChunk &result, TableScanType type); idx_t GetSelVector(TransactionData transaction, idx_t vector_idx, SelectionVector &sel_vector, idx_t max_count); idx_t GetCommittedSelVector(transaction_t start_time, transaction_t transaction_id, idx_t vector_idx, SelectionVector &sel_vector, idx_t max_count); //! For a specific row, returns true if it should be used for the transaction and false otherwise. bool Fetch(TransactionData transaction, idx_t row); //! Fetch a specific row from the row_group and insert it into the result at the specified index void FetchRow(TransactionData transaction, ColumnFetchState &state, const vector &column_ids, row_t row_id, DataChunk &result, idx_t result_idx); //! Append count rows to the version info void AppendVersionInfo(TransactionData transaction, idx_t count); //! Commit a previous append made by RowGroup::AppendVersionInfo void CommitAppend(transaction_t commit_id, idx_t start, idx_t count); //! Revert a previous append made by RowGroup::AppendVersionInfo void RevertAppend(idx_t start); //! Delete the given set of rows in the version manager idx_t Delete(TransactionData transaction, DataTable &table, row_t *row_ids, idx_t count); RowGroupWriteData WriteToDisk(PartialBlockManager &manager, const vector &compression_types); RowGroupPointer Checkpoint(RowGroupWriter &writer, TableStatistics &global_stats); static void Serialize(RowGroupPointer &pointer, Serializer &serializer); static RowGroupPointer Deserialize(Deserializer &source, const vector &columns); void InitializeAppend(RowGroupAppendState &append_state); void Append(RowGroupAppendState &append_state, DataChunk &chunk, idx_t append_count); void Update(TransactionData transaction, DataChunk &updates, row_t *ids, idx_t offset, idx_t count, const vector &column_ids); //! Update a single column; corresponds to DataTable::UpdateColumn //! This method should only be called from the WAL void UpdateColumn(TransactionData transaction, DataChunk &updates, Vector &row_ids, const vector &column_path); void MergeStatistics(idx_t column_idx, const BaseStatistics &other); void MergeIntoStatistics(idx_t column_idx, BaseStatistics &other); unique_ptr GetStatistics(idx_t column_idx); void GetColumnSegmentInfo(idx_t row_group_index, vector &result); void Verify(); void NextVector(CollectionScanState &state); private: ChunkInfo *GetChunkInfo(idx_t vector_idx); ColumnData &GetColumn(storage_t c); idx_t GetColumnCount() const; vector> &GetColumns(); template void TemplatedScan(TransactionData transaction, CollectionScanState &state, DataChunk &result); static void CheckpointDeletes(VersionNode *versions, Serializer &serializer); static shared_ptr DeserializeDeletes(Deserializer &source); private: mutex row_group_lock; mutex stats_lock; vector column_pointers; unique_ptr[]> is_loaded; }; struct VersionNode { unique_ptr info[RowGroup::ROW_GROUP_VECTOR_COUNT]; void SetStart(idx_t start); }; } // namespace duckdb namespace duckdb { struct DataPointer { DataPointer(BaseStatistics stats) : statistics(std::move(stats)) { } uint64_t row_start; uint64_t tuple_count; BlockPointer block_pointer; CompressionType compression_type; //! Type-specific statistics of the segment BaseStatistics statistics; }; struct RowGroupPointer { uint64_t row_start; uint64_t tuple_count; //! The data pointers of the column segments stored in the row group vector data_pointers; //! The versions information of the row group (if any) shared_ptr versions; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/table/table_statistics.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/statistics/column_statistics.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class ColumnStatistics { public: explicit ColumnStatistics(BaseStatistics stats_p); ColumnStatistics(BaseStatistics stats_p, unique_ptr distinct_stats_p); public: static shared_ptr CreateEmptyStats(const LogicalType &type); void Merge(ColumnStatistics &other); void UpdateDistinctStatistics(Vector &v, idx_t count); BaseStatistics &Statistics(); bool HasDistinctStats(); DistinctStatistics &DistinctStats(); void SetDistinct(unique_ptr distinct_stats); shared_ptr Copy() const; void Serialize(Serializer &serializer) const; static shared_ptr Deserialize(Deserializer &source, const LogicalType &type); private: BaseStatistics stats; //! The approximate count distinct stats of the column unique_ptr distinct_stats; }; } // namespace duckdb namespace duckdb { class ColumnList; class PersistentTableData; class TableStatisticsLock { public: TableStatisticsLock(mutex &l) : guard(l) { } lock_guard guard; }; class TableStatistics { public: void Initialize(const vector &types, PersistentTableData &data); void InitializeEmpty(const vector &types); void InitializeAddColumn(TableStatistics &parent, const LogicalType &new_column_type); void InitializeRemoveColumn(TableStatistics &parent, idx_t removed_column); void InitializeAlterType(TableStatistics &parent, idx_t changed_idx, const LogicalType &new_type); void InitializeAddConstraint(TableStatistics &parent); void MergeStats(TableStatistics &other); void MergeStats(idx_t i, BaseStatistics &stats); void MergeStats(TableStatisticsLock &lock, idx_t i, BaseStatistics &stats); void CopyStats(TableStatistics &other); unique_ptr CopyStats(idx_t i); ColumnStatistics &GetStats(idx_t i); bool Empty(); unique_ptr GetLock(); void Serialize(Serializer &serializer); void Deserialize(Deserializer &source, ColumnList &columns); private: //! The statistics lock mutex stats_lock; //! Column statistics vector> column_stats; }; } // namespace duckdb namespace duckdb { class BaseStatistics; class PersistentTableData { public: explicit PersistentTableData(idx_t column_count); ~PersistentTableData(); TableStatistics table_stats; idx_t total_rows; idx_t row_group_count; block_id_t block_id; idx_t offset; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/table_index_list.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/index.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/sort/sort.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/sort/sorted_block.hpp // // //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/fast_mem.hpp // // //===----------------------------------------------------------------------===// template static inline void MemcpyFixed(void *dest, const void *src) { memcpy(dest, src, SIZE); } template static inline int MemcmpFixed(const void *str1, const void *str2) { return memcmp(str1, str2, SIZE); } template static inline void MemsetFixed(void *ptr, int value) { memset(ptr, value, SIZE); } namespace duckdb { //! This templated memcpy is significantly faster than std::memcpy, //! but only when you are calling memcpy with a const size in a loop. //! For instance `while () { memcpy(, , const_size); ... }` static inline void FastMemcpy(void *dest, const void *src, const size_t size) { // LCOV_EXCL_START switch (size) { case 0: return; case 1: return MemcpyFixed<1>(dest, src); case 2: return MemcpyFixed<2>(dest, src); case 3: return MemcpyFixed<3>(dest, src); case 4: return MemcpyFixed<4>(dest, src); case 5: return MemcpyFixed<5>(dest, src); case 6: return MemcpyFixed<6>(dest, src); case 7: return MemcpyFixed<7>(dest, src); case 8: return MemcpyFixed<8>(dest, src); case 9: return MemcpyFixed<9>(dest, src); case 10: return MemcpyFixed<10>(dest, src); case 11: return MemcpyFixed<11>(dest, src); case 12: return MemcpyFixed<12>(dest, src); case 13: return MemcpyFixed<13>(dest, src); case 14: return MemcpyFixed<14>(dest, src); case 15: return MemcpyFixed<15>(dest, src); case 16: return MemcpyFixed<16>(dest, src); case 17: return MemcpyFixed<17>(dest, src); case 18: return MemcpyFixed<18>(dest, src); case 19: return MemcpyFixed<19>(dest, src); case 20: return MemcpyFixed<20>(dest, src); case 21: return MemcpyFixed<21>(dest, src); case 22: return MemcpyFixed<22>(dest, src); case 23: return MemcpyFixed<23>(dest, src); case 24: return MemcpyFixed<24>(dest, src); case 25: return MemcpyFixed<25>(dest, src); case 26: return MemcpyFixed<26>(dest, src); case 27: return MemcpyFixed<27>(dest, src); case 28: return MemcpyFixed<28>(dest, src); case 29: return MemcpyFixed<29>(dest, src); case 30: return MemcpyFixed<30>(dest, src); case 31: return MemcpyFixed<31>(dest, src); case 32: return MemcpyFixed<32>(dest, src); case 33: return MemcpyFixed<33>(dest, src); case 34: return MemcpyFixed<34>(dest, src); case 35: return MemcpyFixed<35>(dest, src); case 36: return MemcpyFixed<36>(dest, src); case 37: return MemcpyFixed<37>(dest, src); case 38: return MemcpyFixed<38>(dest, src); case 39: return MemcpyFixed<39>(dest, src); case 40: return MemcpyFixed<40>(dest, src); case 41: return MemcpyFixed<41>(dest, src); case 42: return MemcpyFixed<42>(dest, src); case 43: return MemcpyFixed<43>(dest, src); case 44: return MemcpyFixed<44>(dest, src); case 45: return MemcpyFixed<45>(dest, src); case 46: return MemcpyFixed<46>(dest, src); case 47: return MemcpyFixed<47>(dest, src); case 48: return MemcpyFixed<48>(dest, src); case 49: return MemcpyFixed<49>(dest, src); case 50: return MemcpyFixed<50>(dest, src); case 51: return MemcpyFixed<51>(dest, src); case 52: return MemcpyFixed<52>(dest, src); case 53: return MemcpyFixed<53>(dest, src); case 54: return MemcpyFixed<54>(dest, src); case 55: return MemcpyFixed<55>(dest, src); case 56: return MemcpyFixed<56>(dest, src); case 57: return MemcpyFixed<57>(dest, src); case 58: return MemcpyFixed<58>(dest, src); case 59: return MemcpyFixed<59>(dest, src); case 60: return MemcpyFixed<60>(dest, src); case 61: return MemcpyFixed<61>(dest, src); case 62: return MemcpyFixed<62>(dest, src); case 63: return MemcpyFixed<63>(dest, src); case 64: return MemcpyFixed<64>(dest, src); case 65: return MemcpyFixed<65>(dest, src); case 66: return MemcpyFixed<66>(dest, src); case 67: return MemcpyFixed<67>(dest, src); case 68: return MemcpyFixed<68>(dest, src); case 69: return MemcpyFixed<69>(dest, src); case 70: return MemcpyFixed<70>(dest, src); case 71: return MemcpyFixed<71>(dest, src); case 72: return MemcpyFixed<72>(dest, src); case 73: return MemcpyFixed<73>(dest, src); case 74: return MemcpyFixed<74>(dest, src); case 75: return MemcpyFixed<75>(dest, src); case 76: return MemcpyFixed<76>(dest, src); case 77: return MemcpyFixed<77>(dest, src); case 78: return MemcpyFixed<78>(dest, src); case 79: return MemcpyFixed<79>(dest, src); case 80: return MemcpyFixed<80>(dest, src); case 81: return MemcpyFixed<81>(dest, src); case 82: return MemcpyFixed<82>(dest, src); case 83: return MemcpyFixed<83>(dest, src); case 84: return MemcpyFixed<84>(dest, src); case 85: return MemcpyFixed<85>(dest, src); case 86: return MemcpyFixed<86>(dest, src); case 87: return MemcpyFixed<87>(dest, src); case 88: return MemcpyFixed<88>(dest, src); case 89: return MemcpyFixed<89>(dest, src); case 90: return MemcpyFixed<90>(dest, src); case 91: return MemcpyFixed<91>(dest, src); case 92: return MemcpyFixed<92>(dest, src); case 93: return MemcpyFixed<93>(dest, src); case 94: return MemcpyFixed<94>(dest, src); case 95: return MemcpyFixed<95>(dest, src); case 96: return MemcpyFixed<96>(dest, src); case 97: return MemcpyFixed<97>(dest, src); case 98: return MemcpyFixed<98>(dest, src); case 99: return MemcpyFixed<99>(dest, src); case 100: return MemcpyFixed<100>(dest, src); case 101: return MemcpyFixed<101>(dest, src); case 102: return MemcpyFixed<102>(dest, src); case 103: return MemcpyFixed<103>(dest, src); case 104: return MemcpyFixed<104>(dest, src); case 105: return MemcpyFixed<105>(dest, src); case 106: return MemcpyFixed<106>(dest, src); case 107: return MemcpyFixed<107>(dest, src); case 108: return MemcpyFixed<108>(dest, src); case 109: return MemcpyFixed<109>(dest, src); case 110: return MemcpyFixed<110>(dest, src); case 111: return MemcpyFixed<111>(dest, src); case 112: return MemcpyFixed<112>(dest, src); case 113: return MemcpyFixed<113>(dest, src); case 114: return MemcpyFixed<114>(dest, src); case 115: return MemcpyFixed<115>(dest, src); case 116: return MemcpyFixed<116>(dest, src); case 117: return MemcpyFixed<117>(dest, src); case 118: return MemcpyFixed<118>(dest, src); case 119: return MemcpyFixed<119>(dest, src); case 120: return MemcpyFixed<120>(dest, src); case 121: return MemcpyFixed<121>(dest, src); case 122: return MemcpyFixed<122>(dest, src); case 123: return MemcpyFixed<123>(dest, src); case 124: return MemcpyFixed<124>(dest, src); case 125: return MemcpyFixed<125>(dest, src); case 126: return MemcpyFixed<126>(dest, src); case 127: return MemcpyFixed<127>(dest, src); case 128: return MemcpyFixed<128>(dest, src); case 129: return MemcpyFixed<129>(dest, src); case 130: return MemcpyFixed<130>(dest, src); case 131: return MemcpyFixed<131>(dest, src); case 132: return MemcpyFixed<132>(dest, src); case 133: return MemcpyFixed<133>(dest, src); case 134: return MemcpyFixed<134>(dest, src); case 135: return MemcpyFixed<135>(dest, src); case 136: return MemcpyFixed<136>(dest, src); case 137: return MemcpyFixed<137>(dest, src); case 138: return MemcpyFixed<138>(dest, src); case 139: return MemcpyFixed<139>(dest, src); case 140: return MemcpyFixed<140>(dest, src); case 141: return MemcpyFixed<141>(dest, src); case 142: return MemcpyFixed<142>(dest, src); case 143: return MemcpyFixed<143>(dest, src); case 144: return MemcpyFixed<144>(dest, src); case 145: return MemcpyFixed<145>(dest, src); case 146: return MemcpyFixed<146>(dest, src); case 147: return MemcpyFixed<147>(dest, src); case 148: return MemcpyFixed<148>(dest, src); case 149: return MemcpyFixed<149>(dest, src); case 150: return MemcpyFixed<150>(dest, src); case 151: return MemcpyFixed<151>(dest, src); case 152: return MemcpyFixed<152>(dest, src); case 153: return MemcpyFixed<153>(dest, src); case 154: return MemcpyFixed<154>(dest, src); case 155: return MemcpyFixed<155>(dest, src); case 156: return MemcpyFixed<156>(dest, src); case 157: return MemcpyFixed<157>(dest, src); case 158: return MemcpyFixed<158>(dest, src); case 159: return MemcpyFixed<159>(dest, src); case 160: return MemcpyFixed<160>(dest, src); case 161: return MemcpyFixed<161>(dest, src); case 162: return MemcpyFixed<162>(dest, src); case 163: return MemcpyFixed<163>(dest, src); case 164: return MemcpyFixed<164>(dest, src); case 165: return MemcpyFixed<165>(dest, src); case 166: return MemcpyFixed<166>(dest, src); case 167: return MemcpyFixed<167>(dest, src); case 168: return MemcpyFixed<168>(dest, src); case 169: return MemcpyFixed<169>(dest, src); case 170: return MemcpyFixed<170>(dest, src); case 171: return MemcpyFixed<171>(dest, src); case 172: return MemcpyFixed<172>(dest, src); case 173: return MemcpyFixed<173>(dest, src); case 174: return MemcpyFixed<174>(dest, src); case 175: return MemcpyFixed<175>(dest, src); case 176: return MemcpyFixed<176>(dest, src); case 177: return MemcpyFixed<177>(dest, src); case 178: return MemcpyFixed<178>(dest, src); case 179: return MemcpyFixed<179>(dest, src); case 180: return MemcpyFixed<180>(dest, src); case 181: return MemcpyFixed<181>(dest, src); case 182: return MemcpyFixed<182>(dest, src); case 183: return MemcpyFixed<183>(dest, src); case 184: return MemcpyFixed<184>(dest, src); case 185: return MemcpyFixed<185>(dest, src); case 186: return MemcpyFixed<186>(dest, src); case 187: return MemcpyFixed<187>(dest, src); case 188: return MemcpyFixed<188>(dest, src); case 189: return MemcpyFixed<189>(dest, src); case 190: return MemcpyFixed<190>(dest, src); case 191: return MemcpyFixed<191>(dest, src); case 192: return MemcpyFixed<192>(dest, src); case 193: return MemcpyFixed<193>(dest, src); case 194: return MemcpyFixed<194>(dest, src); case 195: return MemcpyFixed<195>(dest, src); case 196: return MemcpyFixed<196>(dest, src); case 197: return MemcpyFixed<197>(dest, src); case 198: return MemcpyFixed<198>(dest, src); case 199: return MemcpyFixed<199>(dest, src); case 200: return MemcpyFixed<200>(dest, src); case 201: return MemcpyFixed<201>(dest, src); case 202: return MemcpyFixed<202>(dest, src); case 203: return MemcpyFixed<203>(dest, src); case 204: return MemcpyFixed<204>(dest, src); case 205: return MemcpyFixed<205>(dest, src); case 206: return MemcpyFixed<206>(dest, src); case 207: return MemcpyFixed<207>(dest, src); case 208: return MemcpyFixed<208>(dest, src); case 209: return MemcpyFixed<209>(dest, src); case 210: return MemcpyFixed<210>(dest, src); case 211: return MemcpyFixed<211>(dest, src); case 212: return MemcpyFixed<212>(dest, src); case 213: return MemcpyFixed<213>(dest, src); case 214: return MemcpyFixed<214>(dest, src); case 215: return MemcpyFixed<215>(dest, src); case 216: return MemcpyFixed<216>(dest, src); case 217: return MemcpyFixed<217>(dest, src); case 218: return MemcpyFixed<218>(dest, src); case 219: return MemcpyFixed<219>(dest, src); case 220: return MemcpyFixed<220>(dest, src); case 221: return MemcpyFixed<221>(dest, src); case 222: return MemcpyFixed<222>(dest, src); case 223: return MemcpyFixed<223>(dest, src); case 224: return MemcpyFixed<224>(dest, src); case 225: return MemcpyFixed<225>(dest, src); case 226: return MemcpyFixed<226>(dest, src); case 227: return MemcpyFixed<227>(dest, src); case 228: return MemcpyFixed<228>(dest, src); case 229: return MemcpyFixed<229>(dest, src); case 230: return MemcpyFixed<230>(dest, src); case 231: return MemcpyFixed<231>(dest, src); case 232: return MemcpyFixed<232>(dest, src); case 233: return MemcpyFixed<233>(dest, src); case 234: return MemcpyFixed<234>(dest, src); case 235: return MemcpyFixed<235>(dest, src); case 236: return MemcpyFixed<236>(dest, src); case 237: return MemcpyFixed<237>(dest, src); case 238: return MemcpyFixed<238>(dest, src); case 239: return MemcpyFixed<239>(dest, src); case 240: return MemcpyFixed<240>(dest, src); case 241: return MemcpyFixed<241>(dest, src); case 242: return MemcpyFixed<242>(dest, src); case 243: return MemcpyFixed<243>(dest, src); case 244: return MemcpyFixed<244>(dest, src); case 245: return MemcpyFixed<245>(dest, src); case 246: return MemcpyFixed<246>(dest, src); case 247: return MemcpyFixed<247>(dest, src); case 248: return MemcpyFixed<248>(dest, src); case 249: return MemcpyFixed<249>(dest, src); case 250: return MemcpyFixed<250>(dest, src); case 251: return MemcpyFixed<251>(dest, src); case 252: return MemcpyFixed<252>(dest, src); case 253: return MemcpyFixed<253>(dest, src); case 254: return MemcpyFixed<254>(dest, src); case 255: return MemcpyFixed<255>(dest, src); case 256: return MemcpyFixed<256>(dest, src); default: memcpy(dest, src, size); } // LCOV_EXCL_STOP } //! This templated memcmp is significantly faster than std::memcmp, //! but only when you are calling memcmp with a const size in a loop. //! For instance `while () { memcmp(, , const_size); ... }` static inline int FastMemcmp(const void *str1, const void *str2, const size_t size) { // LCOV_EXCL_START switch (size) { case 0: return 0; case 1: return MemcmpFixed<1>(str1, str2); case 2: return MemcmpFixed<2>(str1, str2); case 3: return MemcmpFixed<3>(str1, str2); case 4: return MemcmpFixed<4>(str1, str2); case 5: return MemcmpFixed<5>(str1, str2); case 6: return MemcmpFixed<6>(str1, str2); case 7: return MemcmpFixed<7>(str1, str2); case 8: return MemcmpFixed<8>(str1, str2); case 9: return MemcmpFixed<9>(str1, str2); case 10: return MemcmpFixed<10>(str1, str2); case 11: return MemcmpFixed<11>(str1, str2); case 12: return MemcmpFixed<12>(str1, str2); case 13: return MemcmpFixed<13>(str1, str2); case 14: return MemcmpFixed<14>(str1, str2); case 15: return MemcmpFixed<15>(str1, str2); case 16: return MemcmpFixed<16>(str1, str2); case 17: return MemcmpFixed<17>(str1, str2); case 18: return MemcmpFixed<18>(str1, str2); case 19: return MemcmpFixed<19>(str1, str2); case 20: return MemcmpFixed<20>(str1, str2); case 21: return MemcmpFixed<21>(str1, str2); case 22: return MemcmpFixed<22>(str1, str2); case 23: return MemcmpFixed<23>(str1, str2); case 24: return MemcmpFixed<24>(str1, str2); case 25: return MemcmpFixed<25>(str1, str2); case 26: return MemcmpFixed<26>(str1, str2); case 27: return MemcmpFixed<27>(str1, str2); case 28: return MemcmpFixed<28>(str1, str2); case 29: return MemcmpFixed<29>(str1, str2); case 30: return MemcmpFixed<30>(str1, str2); case 31: return MemcmpFixed<31>(str1, str2); case 32: return MemcmpFixed<32>(str1, str2); case 33: return MemcmpFixed<33>(str1, str2); case 34: return MemcmpFixed<34>(str1, str2); case 35: return MemcmpFixed<35>(str1, str2); case 36: return MemcmpFixed<36>(str1, str2); case 37: return MemcmpFixed<37>(str1, str2); case 38: return MemcmpFixed<38>(str1, str2); case 39: return MemcmpFixed<39>(str1, str2); case 40: return MemcmpFixed<40>(str1, str2); case 41: return MemcmpFixed<41>(str1, str2); case 42: return MemcmpFixed<42>(str1, str2); case 43: return MemcmpFixed<43>(str1, str2); case 44: return MemcmpFixed<44>(str1, str2); case 45: return MemcmpFixed<45>(str1, str2); case 46: return MemcmpFixed<46>(str1, str2); case 47: return MemcmpFixed<47>(str1, str2); case 48: return MemcmpFixed<48>(str1, str2); case 49: return MemcmpFixed<49>(str1, str2); case 50: return MemcmpFixed<50>(str1, str2); case 51: return MemcmpFixed<51>(str1, str2); case 52: return MemcmpFixed<52>(str1, str2); case 53: return MemcmpFixed<53>(str1, str2); case 54: return MemcmpFixed<54>(str1, str2); case 55: return MemcmpFixed<55>(str1, str2); case 56: return MemcmpFixed<56>(str1, str2); case 57: return MemcmpFixed<57>(str1, str2); case 58: return MemcmpFixed<58>(str1, str2); case 59: return MemcmpFixed<59>(str1, str2); case 60: return MemcmpFixed<60>(str1, str2); case 61: return MemcmpFixed<61>(str1, str2); case 62: return MemcmpFixed<62>(str1, str2); case 63: return MemcmpFixed<63>(str1, str2); case 64: return MemcmpFixed<64>(str1, str2); default: return memcmp(str1, str2, size); } // LCOV_EXCL_STOP } static inline void FastMemset(void *ptr, int value, size_t size) { // LCOV_EXCL_START switch (size) { case 0: return; case 1: return MemsetFixed<1>(ptr, value); case 2: return MemsetFixed<2>(ptr, value); case 3: return MemsetFixed<3>(ptr, value); case 4: return MemsetFixed<4>(ptr, value); case 5: return MemsetFixed<5>(ptr, value); case 6: return MemsetFixed<6>(ptr, value); case 7: return MemsetFixed<7>(ptr, value); case 8: return MemsetFixed<8>(ptr, value); case 9: return MemsetFixed<9>(ptr, value); case 10: return MemsetFixed<10>(ptr, value); case 11: return MemsetFixed<11>(ptr, value); case 12: return MemsetFixed<12>(ptr, value); case 13: return MemsetFixed<13>(ptr, value); case 14: return MemsetFixed<14>(ptr, value); case 15: return MemsetFixed<15>(ptr, value); case 16: return MemsetFixed<16>(ptr, value); case 17: return MemsetFixed<17>(ptr, value); case 18: return MemsetFixed<18>(ptr, value); case 19: return MemsetFixed<19>(ptr, value); case 20: return MemsetFixed<20>(ptr, value); case 21: return MemsetFixed<21>(ptr, value); case 22: return MemsetFixed<22>(ptr, value); case 23: return MemsetFixed<23>(ptr, value); case 24: return MemsetFixed<24>(ptr, value); case 25: return MemsetFixed<25>(ptr, value); case 26: return MemsetFixed<26>(ptr, value); case 27: return MemsetFixed<27>(ptr, value); case 28: return MemsetFixed<28>(ptr, value); case 29: return MemsetFixed<29>(ptr, value); case 30: return MemsetFixed<30>(ptr, value); case 31: return MemsetFixed<31>(ptr, value); case 32: return MemsetFixed<32>(ptr, value); case 33: return MemsetFixed<33>(ptr, value); case 34: return MemsetFixed<34>(ptr, value); case 35: return MemsetFixed<35>(ptr, value); case 36: return MemsetFixed<36>(ptr, value); case 37: return MemsetFixed<37>(ptr, value); case 38: return MemsetFixed<38>(ptr, value); case 39: return MemsetFixed<39>(ptr, value); case 40: return MemsetFixed<40>(ptr, value); case 41: return MemsetFixed<41>(ptr, value); case 42: return MemsetFixed<42>(ptr, value); case 43: return MemsetFixed<43>(ptr, value); case 44: return MemsetFixed<44>(ptr, value); case 45: return MemsetFixed<45>(ptr, value); case 46: return MemsetFixed<46>(ptr, value); case 47: return MemsetFixed<47>(ptr, value); case 48: return MemsetFixed<48>(ptr, value); case 49: return MemsetFixed<49>(ptr, value); case 50: return MemsetFixed<50>(ptr, value); case 51: return MemsetFixed<51>(ptr, value); case 52: return MemsetFixed<52>(ptr, value); case 53: return MemsetFixed<53>(ptr, value); case 54: return MemsetFixed<54>(ptr, value); case 55: return MemsetFixed<55>(ptr, value); case 56: return MemsetFixed<56>(ptr, value); case 57: return MemsetFixed<57>(ptr, value); case 58: return MemsetFixed<58>(ptr, value); case 59: return MemsetFixed<59>(ptr, value); case 60: return MemsetFixed<60>(ptr, value); case 61: return MemsetFixed<61>(ptr, value); case 62: return MemsetFixed<62>(ptr, value); case 63: return MemsetFixed<63>(ptr, value); case 64: return MemsetFixed<64>(ptr, value); case 65: return MemsetFixed<65>(ptr, value); case 66: return MemsetFixed<66>(ptr, value); case 67: return MemsetFixed<67>(ptr, value); case 68: return MemsetFixed<68>(ptr, value); case 69: return MemsetFixed<69>(ptr, value); case 70: return MemsetFixed<70>(ptr, value); case 71: return MemsetFixed<71>(ptr, value); case 72: return MemsetFixed<72>(ptr, value); case 73: return MemsetFixed<73>(ptr, value); case 74: return MemsetFixed<74>(ptr, value); case 75: return MemsetFixed<75>(ptr, value); case 76: return MemsetFixed<76>(ptr, value); case 77: return MemsetFixed<77>(ptr, value); case 78: return MemsetFixed<78>(ptr, value); case 79: return MemsetFixed<79>(ptr, value); case 80: return MemsetFixed<80>(ptr, value); case 81: return MemsetFixed<81>(ptr, value); case 82: return MemsetFixed<82>(ptr, value); case 83: return MemsetFixed<83>(ptr, value); case 84: return MemsetFixed<84>(ptr, value); case 85: return MemsetFixed<85>(ptr, value); case 86: return MemsetFixed<86>(ptr, value); case 87: return MemsetFixed<87>(ptr, value); case 88: return MemsetFixed<88>(ptr, value); case 89: return MemsetFixed<89>(ptr, value); case 90: return MemsetFixed<90>(ptr, value); case 91: return MemsetFixed<91>(ptr, value); case 92: return MemsetFixed<92>(ptr, value); case 93: return MemsetFixed<93>(ptr, value); case 94: return MemsetFixed<94>(ptr, value); case 95: return MemsetFixed<95>(ptr, value); case 96: return MemsetFixed<96>(ptr, value); case 97: return MemsetFixed<97>(ptr, value); case 98: return MemsetFixed<98>(ptr, value); case 99: return MemsetFixed<99>(ptr, value); case 100: return MemsetFixed<100>(ptr, value); case 101: return MemsetFixed<101>(ptr, value); case 102: return MemsetFixed<102>(ptr, value); case 103: return MemsetFixed<103>(ptr, value); case 104: return MemsetFixed<104>(ptr, value); case 105: return MemsetFixed<105>(ptr, value); case 106: return MemsetFixed<106>(ptr, value); case 107: return MemsetFixed<107>(ptr, value); case 108: return MemsetFixed<108>(ptr, value); case 109: return MemsetFixed<109>(ptr, value); case 110: return MemsetFixed<110>(ptr, value); case 111: return MemsetFixed<111>(ptr, value); case 112: return MemsetFixed<112>(ptr, value); case 113: return MemsetFixed<113>(ptr, value); case 114: return MemsetFixed<114>(ptr, value); case 115: return MemsetFixed<115>(ptr, value); case 116: return MemsetFixed<116>(ptr, value); case 117: return MemsetFixed<117>(ptr, value); case 118: return MemsetFixed<118>(ptr, value); case 119: return MemsetFixed<119>(ptr, value); case 120: return MemsetFixed<120>(ptr, value); case 121: return MemsetFixed<121>(ptr, value); case 122: return MemsetFixed<122>(ptr, value); case 123: return MemsetFixed<123>(ptr, value); case 124: return MemsetFixed<124>(ptr, value); case 125: return MemsetFixed<125>(ptr, value); case 126: return MemsetFixed<126>(ptr, value); case 127: return MemsetFixed<127>(ptr, value); case 128: return MemsetFixed<128>(ptr, value); case 129: return MemsetFixed<129>(ptr, value); case 130: return MemsetFixed<130>(ptr, value); case 131: return MemsetFixed<131>(ptr, value); case 132: return MemsetFixed<132>(ptr, value); case 133: return MemsetFixed<133>(ptr, value); case 134: return MemsetFixed<134>(ptr, value); case 135: return MemsetFixed<135>(ptr, value); case 136: return MemsetFixed<136>(ptr, value); case 137: return MemsetFixed<137>(ptr, value); case 138: return MemsetFixed<138>(ptr, value); case 139: return MemsetFixed<139>(ptr, value); case 140: return MemsetFixed<140>(ptr, value); case 141: return MemsetFixed<141>(ptr, value); case 142: return MemsetFixed<142>(ptr, value); case 143: return MemsetFixed<143>(ptr, value); case 144: return MemsetFixed<144>(ptr, value); case 145: return MemsetFixed<145>(ptr, value); case 146: return MemsetFixed<146>(ptr, value); case 147: return MemsetFixed<147>(ptr, value); case 148: return MemsetFixed<148>(ptr, value); case 149: return MemsetFixed<149>(ptr, value); case 150: return MemsetFixed<150>(ptr, value); case 151: return MemsetFixed<151>(ptr, value); case 152: return MemsetFixed<152>(ptr, value); case 153: return MemsetFixed<153>(ptr, value); case 154: return MemsetFixed<154>(ptr, value); case 155: return MemsetFixed<155>(ptr, value); case 156: return MemsetFixed<156>(ptr, value); case 157: return MemsetFixed<157>(ptr, value); case 158: return MemsetFixed<158>(ptr, value); case 159: return MemsetFixed<159>(ptr, value); case 160: return MemsetFixed<160>(ptr, value); case 161: return MemsetFixed<161>(ptr, value); case 162: return MemsetFixed<162>(ptr, value); case 163: return MemsetFixed<163>(ptr, value); case 164: return MemsetFixed<164>(ptr, value); case 165: return MemsetFixed<165>(ptr, value); case 166: return MemsetFixed<166>(ptr, value); case 167: return MemsetFixed<167>(ptr, value); case 168: return MemsetFixed<168>(ptr, value); case 169: return MemsetFixed<169>(ptr, value); case 170: return MemsetFixed<170>(ptr, value); case 171: return MemsetFixed<171>(ptr, value); case 172: return MemsetFixed<172>(ptr, value); case 173: return MemsetFixed<173>(ptr, value); case 174: return MemsetFixed<174>(ptr, value); case 175: return MemsetFixed<175>(ptr, value); case 176: return MemsetFixed<176>(ptr, value); case 177: return MemsetFixed<177>(ptr, value); case 178: return MemsetFixed<178>(ptr, value); case 179: return MemsetFixed<179>(ptr, value); case 180: return MemsetFixed<180>(ptr, value); case 181: return MemsetFixed<181>(ptr, value); case 182: return MemsetFixed<182>(ptr, value); case 183: return MemsetFixed<183>(ptr, value); case 184: return MemsetFixed<184>(ptr, value); case 185: return MemsetFixed<185>(ptr, value); case 186: return MemsetFixed<186>(ptr, value); case 187: return MemsetFixed<187>(ptr, value); case 188: return MemsetFixed<188>(ptr, value); case 189: return MemsetFixed<189>(ptr, value); case 190: return MemsetFixed<190>(ptr, value); case 191: return MemsetFixed<191>(ptr, value); case 192: return MemsetFixed<192>(ptr, value); case 193: return MemsetFixed<193>(ptr, value); case 194: return MemsetFixed<194>(ptr, value); case 195: return MemsetFixed<195>(ptr, value); case 196: return MemsetFixed<196>(ptr, value); case 197: return MemsetFixed<197>(ptr, value); case 198: return MemsetFixed<198>(ptr, value); case 199: return MemsetFixed<199>(ptr, value); case 200: return MemsetFixed<200>(ptr, value); case 201: return MemsetFixed<201>(ptr, value); case 202: return MemsetFixed<202>(ptr, value); case 203: return MemsetFixed<203>(ptr, value); case 204: return MemsetFixed<204>(ptr, value); case 205: return MemsetFixed<205>(ptr, value); case 206: return MemsetFixed<206>(ptr, value); case 207: return MemsetFixed<207>(ptr, value); case 208: return MemsetFixed<208>(ptr, value); case 209: return MemsetFixed<209>(ptr, value); case 210: return MemsetFixed<210>(ptr, value); case 211: return MemsetFixed<211>(ptr, value); case 212: return MemsetFixed<212>(ptr, value); case 213: return MemsetFixed<213>(ptr, value); case 214: return MemsetFixed<214>(ptr, value); case 215: return MemsetFixed<215>(ptr, value); case 216: return MemsetFixed<216>(ptr, value); case 217: return MemsetFixed<217>(ptr, value); case 218: return MemsetFixed<218>(ptr, value); case 219: return MemsetFixed<219>(ptr, value); case 220: return MemsetFixed<220>(ptr, value); case 221: return MemsetFixed<221>(ptr, value); case 222: return MemsetFixed<222>(ptr, value); case 223: return MemsetFixed<223>(ptr, value); case 224: return MemsetFixed<224>(ptr, value); case 225: return MemsetFixed<225>(ptr, value); case 226: return MemsetFixed<226>(ptr, value); case 227: return MemsetFixed<227>(ptr, value); case 228: return MemsetFixed<228>(ptr, value); case 229: return MemsetFixed<229>(ptr, value); case 230: return MemsetFixed<230>(ptr, value); case 231: return MemsetFixed<231>(ptr, value); case 232: return MemsetFixed<232>(ptr, value); case 233: return MemsetFixed<233>(ptr, value); case 234: return MemsetFixed<234>(ptr, value); case 235: return MemsetFixed<235>(ptr, value); case 236: return MemsetFixed<236>(ptr, value); case 237: return MemsetFixed<237>(ptr, value); case 238: return MemsetFixed<238>(ptr, value); case 239: return MemsetFixed<239>(ptr, value); case 240: return MemsetFixed<240>(ptr, value); case 241: return MemsetFixed<241>(ptr, value); case 242: return MemsetFixed<242>(ptr, value); case 243: return MemsetFixed<243>(ptr, value); case 244: return MemsetFixed<244>(ptr, value); case 245: return MemsetFixed<245>(ptr, value); case 246: return MemsetFixed<246>(ptr, value); case 247: return MemsetFixed<247>(ptr, value); case 248: return MemsetFixed<248>(ptr, value); case 249: return MemsetFixed<249>(ptr, value); case 250: return MemsetFixed<250>(ptr, value); case 251: return MemsetFixed<251>(ptr, value); case 252: return MemsetFixed<252>(ptr, value); case 253: return MemsetFixed<253>(ptr, value); case 254: return MemsetFixed<254>(ptr, value); case 255: return MemsetFixed<255>(ptr, value); case 256: return MemsetFixed<256>(ptr, value); default: memset(ptr, value, size); } // LCOV_EXCL_STOP } } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/sort/comparators.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/row/row_layout.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/operator/aggregate/aggregate_object.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/expression_executor.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class Allocator; class ExecutionContext; //! ExpressionExecutor is responsible for executing a set of expressions and storing the result in a data chunk class ExpressionExecutor { friend class Index; friend class CreateIndexLocalSinkState; public: DUCKDB_API explicit ExpressionExecutor(ClientContext &context); DUCKDB_API ExpressionExecutor(ClientContext &context, const Expression *expression); DUCKDB_API ExpressionExecutor(ClientContext &context, const Expression &expression); DUCKDB_API ExpressionExecutor(ClientContext &context, const vector> &expressions); ExpressionExecutor(ExpressionExecutor &&) = delete; //! The expressions of the executor vector expressions; //! The data chunk of the current physical operator, used to resolve //! column references and determines the output cardinality DataChunk *chunk = nullptr; public: bool HasContext(); ClientContext &GetContext(); Allocator &GetAllocator(); //! Add an expression to the set of to-be-executed expressions of the executor DUCKDB_API void AddExpression(const Expression &expr); //! Execute the set of expressions with the given input chunk and store the result in the output chunk DUCKDB_API void Execute(DataChunk *input, DataChunk &result); inline void Execute(DataChunk &input, DataChunk &result) { Execute(&input, result); } inline void Execute(DataChunk &result) { Execute(nullptr, result); } //! Execute the ExpressionExecutor and put the result in the result vector; this should only be used for expression //! executors with a single expression DUCKDB_API void ExecuteExpression(DataChunk &input, Vector &result); //! Execute the ExpressionExecutor and put the result in the result vector; this should only be used for expression //! executors with a single expression DUCKDB_API void ExecuteExpression(Vector &result); //! Execute the ExpressionExecutor and generate a selection vector from all true values in the result; this should //! only be used with a single boolean expression DUCKDB_API idx_t SelectExpression(DataChunk &input, SelectionVector &sel); //! Execute the expression with index `expr_idx` and store the result in the result vector DUCKDB_API void ExecuteExpression(idx_t expr_idx, Vector &result); //! Evaluate a scalar expression and fold it into a single value DUCKDB_API static Value EvaluateScalar(ClientContext &context, const Expression &expr, bool allow_unfoldable = false); //! Try to evaluate a scalar expression and fold it into a single value, returns false if an exception is thrown DUCKDB_API static bool TryEvaluateScalar(ClientContext &context, const Expression &expr, Value &result); //! Initialize the state of a given expression static unique_ptr InitializeState(const Expression &expr, ExpressionExecutorState &state); inline void SetChunk(DataChunk *chunk) { this->chunk = chunk; } inline void SetChunk(DataChunk &chunk) { SetChunk(&chunk); } DUCKDB_API vector> &GetStates(); protected: void Initialize(const Expression &expr, ExpressionExecutorState &state); static unique_ptr InitializeState(const BoundReferenceExpression &expr, ExpressionExecutorState &state); static unique_ptr InitializeState(const BoundBetweenExpression &expr, ExpressionExecutorState &state); static unique_ptr InitializeState(const BoundCaseExpression &expr, ExpressionExecutorState &state); static unique_ptr InitializeState(const BoundCastExpression &expr, ExpressionExecutorState &state); static unique_ptr InitializeState(const BoundComparisonExpression &expr, ExpressionExecutorState &state); static unique_ptr InitializeState(const BoundConjunctionExpression &expr, ExpressionExecutorState &state); static unique_ptr InitializeState(const BoundConstantExpression &expr, ExpressionExecutorState &state); static unique_ptr InitializeState(const BoundFunctionExpression &expr, ExpressionExecutorState &state); static unique_ptr InitializeState(const BoundOperatorExpression &expr, ExpressionExecutorState &state); static unique_ptr InitializeState(const BoundParameterExpression &expr, ExpressionExecutorState &state); void Execute(const Expression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, Vector &result); void Execute(const BoundBetweenExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, Vector &result); void Execute(const BoundCaseExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, Vector &result); void Execute(const BoundCastExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, Vector &result); void Execute(const BoundComparisonExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, Vector &result); void Execute(const BoundConjunctionExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, Vector &result); void Execute(const BoundConstantExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, Vector &result); void Execute(const BoundFunctionExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, Vector &result); void Execute(const BoundOperatorExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, Vector &result); void Execute(const BoundParameterExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, Vector &result); void Execute(const BoundReferenceExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, Vector &result); //! Execute the (boolean-returning) expression and generate a selection vector with all entries that are "true" in //! the result idx_t Select(const Expression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, SelectionVector *true_sel, SelectionVector *false_sel); idx_t DefaultSelect(const Expression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, SelectionVector *true_sel, SelectionVector *false_sel); idx_t Select(const BoundBetweenExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, SelectionVector *true_sel, SelectionVector *false_sel); idx_t Select(const BoundComparisonExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, SelectionVector *true_sel, SelectionVector *false_sel); idx_t Select(const BoundConjunctionExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, SelectionVector *true_sel, SelectionVector *false_sel); //! Verify that the output of a step in the ExpressionExecutor is correct void Verify(const Expression &expr, Vector &result, idx_t count); void FillSwitch(Vector &vector, Vector &result, const SelectionVector &sel, sel_t count); private: //! Client context optional_ptr context; //! The states of the expression executor; this holds any intermediates and temporary states of expressions vector> states; private: // it is possible to create an expression executor without a ClientContext - but it should be avoided DUCKDB_API ExpressionExecutor(); DUCKDB_API ExpressionExecutor(const vector> &exprs); }; } // namespace duckdb namespace duckdb { class BoundAggregateExpression; class BoundWindowExpression; struct FunctionDataWrapper { FunctionDataWrapper(unique_ptr function_data_p) : function_data(std::move(function_data_p)) { } unique_ptr function_data; }; struct AggregateObject { AggregateObject(AggregateFunction function, FunctionData *bind_data, idx_t child_count, idx_t payload_size, AggregateType aggr_type, PhysicalType return_type, Expression *filter = nullptr); explicit AggregateObject(BoundAggregateExpression *aggr); explicit AggregateObject(BoundWindowExpression &window); FunctionData *GetFunctionData() const { return bind_data_wrapper ? bind_data_wrapper->function_data.get() : nullptr; } AggregateFunction function; shared_ptr bind_data_wrapper; idx_t child_count; idx_t payload_size; AggregateType aggr_type; PhysicalType return_type; Expression *filter = nullptr; public: bool IsDistinct() const { return aggr_type == AggregateType::DISTINCT; } static vector CreateAggregateObjects(const vector &bindings); }; struct AggregateFilterData { AggregateFilterData(ClientContext &context, Expression &filter_expr, const vector &payload_types); idx_t ApplyFilter(DataChunk &payload); ExpressionExecutor filter_executor; DataChunk filtered_payload; SelectionVector true_sel; }; struct AggregateFilterDataSet { AggregateFilterDataSet(); vector> filter_data; public: void Initialize(ClientContext &context, const vector &aggregates, const vector &payload_types); AggregateFilterData &GetFilterData(idx_t aggr_idx); }; } // namespace duckdb namespace duckdb { class RowLayout { public: friend class TupleDataLayout; using Aggregates = vector; using ValidityBytes = TemplatedValidityMask; //! Creates an empty RowLayout RowLayout(); public: //! Initializes the RowLayout with the specified types and aggregates to an empty RowLayout void Initialize(vector types_p, Aggregates aggregates_p, bool align = true); //! Initializes the RowLayout with the specified types to an empty RowLayout void Initialize(vector types, bool align = true); //! Initializes the RowLayout with the specified aggregates to an empty RowLayout void Initialize(Aggregates aggregates_p, bool align = true); //! Returns the number of data columns inline idx_t ColumnCount() const { return types.size(); } //! Returns a list of the column types for this data chunk inline const vector &GetTypes() const { return types; } //! Returns the number of aggregates inline idx_t AggregateCount() const { return aggregates.size(); } //! Returns a list of the aggregates for this data chunk inline Aggregates &GetAggregates() { return aggregates; } //! Returns the total width required for each row, including padding inline idx_t GetRowWidth() const { return row_width; } //! Returns the offset to the start of the data inline idx_t GetDataOffset() const { return flag_width; } //! Returns the total width required for the data, including padding inline idx_t GetDataWidth() const { return data_width; } //! Returns the offset to the start of the aggregates inline idx_t GetAggrOffset() const { return flag_width + data_width; } //! Returns the total width required for the aggregates, including padding inline idx_t GetAggrWidth() const { return aggr_width; } //! Returns the column offsets into each row inline const vector &GetOffsets() const { return offsets; } //! Returns whether all columns in this layout are constant size inline bool AllConstant() const { return all_constant; } inline idx_t GetHeapOffset() const { return heap_pointer_offset; } private: //! The types of the data columns vector types; //! The aggregate functions Aggregates aggregates; //! The width of the validity header idx_t flag_width; //! The width of the data portion idx_t data_width; //! The width of the aggregate state portion idx_t aggr_width; //! The width of the entire row idx_t row_width; //! The offsets to the columns and aggregate data in each row vector offsets; //! Whether all columns in this layout are constant size bool all_constant; //! Offset to the pointer to the heap for each row idx_t heap_pointer_offset; }; } // namespace duckdb namespace duckdb { struct SortLayout; struct SBScanState; using ValidityBytes = RowLayout::ValidityBytes; struct Comparators { public: //! Whether a tie between two blobs can be broken static bool TieIsBreakable(const idx_t &col_idx, const data_ptr_t &row_ptr, const SortLayout &sort_layout); //! Compares the tuples that a being read from in the 'left' and 'right blocks during merge sort //! (only in case we cannot simply 'memcmp' - if there are blob columns) static int CompareTuple(const SBScanState &left, const SBScanState &right, const data_ptr_t &l_ptr, const data_ptr_t &r_ptr, const SortLayout &sort_layout, const bool &external_sort); //! Compare two blob values static int CompareVal(const data_ptr_t l_ptr, const data_ptr_t r_ptr, const LogicalType &type); private: //! Compares two blob values that were initially tied by their prefix static int BreakBlobTie(const idx_t &tie_col, const SBScanState &left, const SBScanState &right, const SortLayout &sort_layout, const bool &external); //! Compare two fixed-size values template static int TemplatedCompareVal(const data_ptr_t &left_ptr, const data_ptr_t &right_ptr); //! Compare two values at the pointers (can be recursive if nested type) static int CompareValAndAdvance(data_ptr_t &l_ptr, data_ptr_t &r_ptr, const LogicalType &type, bool valid); //! Compares two fixed-size values at the given pointers template static int TemplatedCompareAndAdvance(data_ptr_t &left_ptr, data_ptr_t &right_ptr); //! Compares two string values at the given pointers static int CompareStringAndAdvance(data_ptr_t &left_ptr, data_ptr_t &right_ptr, bool valid); //! Compares two struct values at the given pointers (recursive) static int CompareStructAndAdvance(data_ptr_t &left_ptr, data_ptr_t &right_ptr, const child_list_t &types, bool valid); //! Compare two list values at the pointers (can be recursive if nested type) static int CompareListAndAdvance(data_ptr_t &left_ptr, data_ptr_t &right_ptr, const LogicalType &type, bool valid); //! Compares a list of fixed-size values template static int TemplatedCompareListLoop(data_ptr_t &left_ptr, data_ptr_t &right_ptr, const ValidityBytes &left_validity, const ValidityBytes &right_validity, const idx_t &count); //! Unwizzles an offset into a pointer static void UnswizzleSingleValue(data_ptr_t data_ptr, const data_ptr_t &heap_ptr, const LogicalType &type); //! Swizzles a pointer into an offset static void SwizzleSingleValue(data_ptr_t data_ptr, const data_ptr_t &heap_ptr, const LogicalType &type); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/row/row_data_collection_scanner.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class BufferHandle; class RowDataCollection; struct RowDataBlock; class DataChunk; //! Used to scan the data into DataChunks after sorting struct RowDataCollectionScanner { public: using Types = vector; struct ScanState { explicit ScanState(const RowDataCollectionScanner &scanner_p) : scanner(scanner_p), block_idx(0), entry_idx(0) { } void PinData(); //! The data layout const RowDataCollectionScanner &scanner; idx_t block_idx; idx_t entry_idx; BufferHandle data_handle; BufferHandle heap_handle; // We must pin ALL blocks we are going to gather from vector pinned_blocks; }; //! Ensure that heap blocks correspond to row blocks static void AlignHeapBlocks(RowDataCollection &dst_block_collection, RowDataCollection &dst_string_heap, RowDataCollection &src_block_collection, RowDataCollection &src_string_heap, const RowLayout &layout); RowDataCollectionScanner(RowDataCollection &rows, RowDataCollection &heap, const RowLayout &layout, bool external, bool flush = true); //! The type layout of the payload inline const vector &GetTypes() const { return layout.GetTypes(); } //! The number of rows in the collection inline idx_t Count() const { return total_count; } //! The number of rows scanned so far inline idx_t Scanned() const { return total_scanned; } //! The number of remaining rows inline idx_t Remaining() const { return total_count - total_scanned; } //! Swizzle the blocks for external scanning //! Swizzling is all or nothing, so if we have scanned previously, //! we need to re-swizzle. void ReSwizzle(); void SwizzleBlock(RowDataBlock &data_block, RowDataBlock &heap_block); //! Scans the next data chunk from the sorted data void Scan(DataChunk &chunk); //! Resets to the start and updates the flush flag void Reset(bool flush = true); private: //! The row data being scanned RowDataCollection &rows; //! The row heap being scanned RowDataCollection &heap; //! The data layout const RowLayout layout; //! Read state ScanState read_state; //! The total count of sorted_data const idx_t total_count; //! The number of rows scanned so far idx_t total_scanned; //! Addresses used to gather from the sorted data Vector addresses = Vector(LogicalType::POINTER); //! Whether the blocks can be flushed to disk const bool external; //! Whether to flush the blocks after scanning bool flush; //! Whether we are unswizzling the blocks const bool unswizzling; //! Checks that the newest block is valid void ValidateUnscannedBlock() const; }; } // namespace duckdb namespace duckdb { class BufferManager; struct RowDataBlock; struct SortLayout; struct GlobalSortState; enum class SortedDataType { BLOB, PAYLOAD }; //! Object that holds sorted rows, and an accompanying heap if there are blobs struct SortedData { public: SortedData(SortedDataType type, const RowLayout &layout, BufferManager &buffer_manager, GlobalSortState &state); //! Number of rows that this object holds idx_t Count(); //! Initialize new block to write to void CreateBlock(); //! Create a slice that holds the rows between the start and end indices unique_ptr CreateSlice(idx_t start_block_index, idx_t end_block_index, idx_t end_entry_index); //! Unswizzles all void Unswizzle(); public: const SortedDataType type; //! Layout of this data const RowLayout layout; //! Data and heap blocks vector> data_blocks; vector> heap_blocks; //! Whether the pointers in this sorted data are swizzled bool swizzled; private: //! The buffer manager BufferManager &buffer_manager; //! The global state GlobalSortState &state; }; //! Block that holds sorted rows: radix, blob and payload data struct SortedBlock { public: SortedBlock(BufferManager &buffer_manager, GlobalSortState &gstate); //! Number of rows that this object holds idx_t Count() const; //! Initialize this block to write data to void InitializeWrite(); //! Init new block to write to void CreateBlock(); //! Fill this sorted block by appending the blocks held by a vector of sorted blocks void AppendSortedBlocks(vector> &sorted_blocks); //! Locate the block and entry index of a row in this block, //! given an index between 0 and the total number of rows in this block void GlobalToLocalIndex(const idx_t &global_idx, idx_t &local_block_index, idx_t &local_entry_index); //! Create a slice that holds the rows between the start and end indices unique_ptr CreateSlice(const idx_t start, const idx_t end, idx_t &entry_idx); //! Size (in bytes) of the heap of this block idx_t HeapSize() const; //! Total size (in bytes) of this block idx_t SizeInBytes() const; public: //! Radix/memcmp sortable data vector> radix_sorting_data; //! Variable sized sorting data unique_ptr blob_sorting_data; //! Payload data unique_ptr payload_data; private: //! Buffer manager, global state, and sorting layout constants BufferManager &buffer_manager; GlobalSortState &state; const SortLayout &sort_layout; const RowLayout &payload_layout; }; //! State used to scan a SortedBlock e.g. during merge sort struct SBScanState { public: SBScanState(BufferManager &buffer_manager, GlobalSortState &state); void PinRadix(idx_t block_idx_to); void PinData(SortedData &sd); data_ptr_t RadixPtr() const; data_ptr_t DataPtr(SortedData &sd) const; data_ptr_t HeapPtr(SortedData &sd) const; data_ptr_t BaseHeapPtr(SortedData &sd) const; idx_t Remaining() const; void SetIndices(idx_t block_idx_to, idx_t entry_idx_to); public: BufferManager &buffer_manager; const SortLayout &sort_layout; GlobalSortState &state; SortedBlock *sb; idx_t block_idx; idx_t entry_idx; BufferHandle radix_handle; BufferHandle blob_sorting_data_handle; BufferHandle blob_sorting_heap_handle; BufferHandle payload_data_handle; BufferHandle payload_heap_handle; }; //! Used to scan the data into DataChunks after sorting struct PayloadScanner { public: PayloadScanner(SortedData &sorted_data, GlobalSortState &global_sort_state, bool flush = true); explicit PayloadScanner(GlobalSortState &global_sort_state, bool flush = true); //! Scan a single block PayloadScanner(GlobalSortState &global_sort_state, idx_t block_idx, bool flush = false); //! The type layout of the payload inline const vector &GetPayloadTypes() const { return scanner->GetTypes(); } //! The number of rows scanned so far inline idx_t Scanned() const { return scanner->Scanned(); } //! The number of remaining rows inline idx_t Remaining() const { return scanner->Remaining(); } //! Scans the next data chunk from the sorted data void Scan(DataChunk &chunk); private: //! The sorted data being scanned unique_ptr rows; unique_ptr heap; //! The actual scanner unique_ptr scanner; }; struct SBIterator { static int ComparisonValue(ExpressionType comparison); SBIterator(GlobalSortState &gss, ExpressionType comparison, idx_t entry_idx_p = 0); inline idx_t GetIndex() const { return entry_idx; } inline void SetIndex(idx_t entry_idx_p) { const auto new_block_idx = entry_idx_p / block_capacity; if (new_block_idx != scan.block_idx) { scan.SetIndices(new_block_idx, 0); if (new_block_idx < block_count) { scan.PinRadix(scan.block_idx); block_ptr = scan.RadixPtr(); if (!all_constant) { scan.PinData(*scan.sb->blob_sorting_data); } } } scan.entry_idx = entry_idx_p % block_capacity; entry_ptr = block_ptr + scan.entry_idx * entry_size; entry_idx = entry_idx_p; } inline SBIterator &operator++() { if (++scan.entry_idx < block_capacity) { entry_ptr += entry_size; ++entry_idx; } else { SetIndex(entry_idx + 1); } return *this; } inline SBIterator &operator--() { if (scan.entry_idx) { --scan.entry_idx; --entry_idx; entry_ptr -= entry_size; } else { SetIndex(entry_idx - 1); } return *this; } inline bool Compare(const SBIterator &other) const { int comp_res; if (all_constant) { comp_res = FastMemcmp(entry_ptr, other.entry_ptr, cmp_size); } else { comp_res = Comparators::CompareTuple(scan, other.scan, entry_ptr, other.entry_ptr, sort_layout, external); } return comp_res <= cmp; } // Fixed comparison parameters const SortLayout &sort_layout; const idx_t block_count; const idx_t block_capacity; const size_t cmp_size; const size_t entry_size; const bool all_constant; const bool external; const int cmp; // Iteration state SBScanState scan; idx_t entry_idx; data_ptr_t block_ptr; data_ptr_t entry_ptr; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/row/row_data_collection.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/buffer_manager.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct TemporaryFileInformation { string path; idx_t size; }; } // namespace duckdb namespace duckdb { class Allocator; class BufferPool; class BufferManager { friend class BufferHandle; friend class BlockHandle; friend class BlockManager; public: BufferManager() { } virtual ~BufferManager() { } public: static unique_ptr CreateStandardBufferManager(DatabaseInstance &db, DBConfig &config); virtual BufferHandle Allocate(idx_t block_size, bool can_destroy = true, shared_ptr *block = nullptr) = 0; //! Reallocate an in-memory buffer that is pinned. virtual void ReAllocate(shared_ptr &handle, idx_t block_size) = 0; virtual BufferHandle Pin(shared_ptr &handle) = 0; virtual void Unpin(shared_ptr &handle) = 0; //! Returns the currently allocated memory virtual idx_t GetUsedMemory() const = 0; //! Returns the maximum available memory virtual idx_t GetMaxMemory() const = 0; virtual shared_ptr RegisterSmallMemory(idx_t block_size); virtual DUCKDB_API Allocator &GetBufferAllocator(); virtual DUCKDB_API void ReserveMemory(idx_t size); virtual DUCKDB_API void FreeReservedMemory(idx_t size); //! Set a new memory limit to the buffer manager, throws an exception if the new limit is too low and not enough //! blocks can be evicted virtual void SetLimit(idx_t limit = (idx_t)-1); virtual vector GetTemporaryFiles(); virtual const string &GetTemporaryDirectory(); virtual void SetTemporaryDirectory(const string &new_dir); virtual DatabaseInstance &GetDatabase(); virtual bool HasTemporaryDirectory() const; //! Construct a managed buffer. virtual unique_ptr ConstructManagedBuffer(idx_t size, unique_ptr &&source, FileBufferType type = FileBufferType::MANAGED_BUFFER); //! Get the underlying buffer pool responsible for managing the buffers virtual BufferPool &GetBufferPool(); // Static methods DUCKDB_API static BufferManager &GetBufferManager(DatabaseInstance &db); DUCKDB_API static BufferManager &GetBufferManager(ClientContext &context); DUCKDB_API static BufferManager &GetBufferManager(AttachedDatabase &db); static idx_t GetAllocSize(idx_t block_size) { return AlignValue(block_size + Storage::BLOCK_HEADER_SIZE); } protected: virtual void PurgeQueue() = 0; virtual void AddToEvictionQueue(shared_ptr &handle); virtual void WriteTemporaryBuffer(block_id_t block_id, FileBuffer &buffer); virtual unique_ptr ReadTemporaryBuffer(block_id_t id, unique_ptr buffer); virtual void DeleteTemporaryFile(block_id_t id); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/buffer/block_handle.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class BlockManager; class BufferHandle; class BufferPool; class DatabaseInstance; enum class BlockState : uint8_t { BLOCK_UNLOADED = 0, BLOCK_LOADED = 1 }; struct BufferPoolReservation { idx_t size {0}; BufferPool &pool; BufferPoolReservation(BufferPool &pool); BufferPoolReservation(const BufferPoolReservation &) = delete; BufferPoolReservation &operator=(const BufferPoolReservation &) = delete; BufferPoolReservation(BufferPoolReservation &&) noexcept; BufferPoolReservation &operator=(BufferPoolReservation &&) noexcept; ~BufferPoolReservation(); void Resize(idx_t new_size); void Merge(BufferPoolReservation &&src); }; struct TempBufferPoolReservation : BufferPoolReservation { TempBufferPoolReservation(BufferPool &pool, idx_t size) : BufferPoolReservation(pool) { Resize(size); } TempBufferPoolReservation(TempBufferPoolReservation &&) = default; ~TempBufferPoolReservation() { Resize(0); } }; class BlockHandle { friend class BlockManager; friend struct BufferEvictionNode; friend class BufferHandle; friend class BufferManager; friend class StandardBufferManager; friend class BufferPool; public: BlockHandle(BlockManager &block_manager, block_id_t block_id); BlockHandle(BlockManager &block_manager, block_id_t block_id, unique_ptr buffer, bool can_destroy, idx_t block_size, BufferPoolReservation &&reservation); ~BlockHandle(); BlockManager &block_manager; public: block_id_t BlockId() { return block_id; } void ResizeBuffer(idx_t block_size, int64_t memory_delta) { D_ASSERT(buffer); // resize and adjust current memory buffer->Resize(block_size); memory_usage += memory_delta; D_ASSERT(memory_usage == buffer->AllocSize()); } int32_t Readers() const { return readers; } inline bool IsSwizzled() const { return !unswizzled; } inline void SetSwizzling(const char *unswizzler) { unswizzled = unswizzler; } inline void SetCanDestroy(bool can_destroy_p) { can_destroy = can_destroy_p; } inline const idx_t &GetMemoryUsage() const { return memory_usage; } private: static BufferHandle Load(shared_ptr &handle, unique_ptr buffer = nullptr); unique_ptr UnloadAndTakeBlock(); void Unload(); bool CanUnload(); //! The block-level lock mutex lock; //! Whether or not the block is loaded/unloaded atomic state; //! Amount of concurrent readers atomic readers; //! The block id of the block const block_id_t block_id; //! Pointer to loaded data (if any) unique_ptr buffer; //! Internal eviction timestamp atomic eviction_timestamp; //! Whether or not the buffer can be destroyed (only used for temporary buffers) bool can_destroy; //! The memory usage of the block (when loaded). If we are pinning/loading //! an unloaded block, this tells us how much memory to reserve. idx_t memory_usage; //! Current memory reservation / usage BufferPoolReservation memory_charge; //! Does the block contain any memory pointers? const char *unswizzled; }; } // namespace duckdb namespace duckdb { struct RowDataBlock { public: RowDataBlock(BufferManager &buffer_manager, idx_t capacity, idx_t entry_size) : capacity(capacity), entry_size(entry_size), count(0), byte_offset(0) { idx_t size = MaxValue(Storage::BLOCK_SIZE, capacity * entry_size); buffer_manager.Allocate(size, false, &block); D_ASSERT(BufferManager::GetAllocSize(size) == block->GetMemoryUsage()); } explicit RowDataBlock(idx_t entry_size) : entry_size(entry_size) { } //! The buffer block handle shared_ptr block; //! Capacity (number of entries) and entry size that fit in this block idx_t capacity; const idx_t entry_size; //! Number of entries currently in this block idx_t count; //! Write offset (if variable size entries) idx_t byte_offset; private: //! Implicit copying is not allowed RowDataBlock(const RowDataBlock &) = delete; public: unique_ptr Copy() { auto result = make_uniq(entry_size); result->block = block; result->capacity = capacity; result->count = count; result->byte_offset = byte_offset; return result; } }; struct BlockAppendEntry { BlockAppendEntry(data_ptr_t baseptr, idx_t count) : baseptr(baseptr), count(count) { } data_ptr_t baseptr; idx_t count; }; class RowDataCollection { public: RowDataCollection(BufferManager &buffer_manager, idx_t block_capacity, idx_t entry_size, bool keep_pinned = false); unique_ptr CloneEmpty(bool keep_pinned = false) const { return make_uniq(buffer_manager, block_capacity, entry_size, keep_pinned); } //! BufferManager BufferManager &buffer_manager; //! The total number of stored entries idx_t count; //! The number of entries per block idx_t block_capacity; //! Size of entries in the blocks idx_t entry_size; //! The blocks holding the main data vector> blocks; //! The blocks that this collection currently has pinned vector pinned_blocks; //! Whether the blocks should stay pinned (necessary for e.g. a heap) const bool keep_pinned; public: idx_t AppendToBlock(RowDataBlock &block, BufferHandle &handle, vector &append_entries, idx_t remaining, idx_t entry_sizes[]); RowDataBlock &CreateBlock(); vector Build(idx_t added_count, data_ptr_t key_locations[], idx_t entry_sizes[], const SelectionVector *sel = FlatVector::IncrementalSelectionVector()); void Merge(RowDataCollection &other); void Clear() { blocks.clear(); pinned_blocks.clear(); count = 0; } //! The size (in bytes) of this RowDataCollection idx_t SizeInBytes() const { VerifyBlockSizes(); idx_t size = 0; for (auto &block : blocks) { size += block->block->GetMemoryUsage(); } return size; } //! Verifies that the block sizes are correct (Debug only) void VerifyBlockSizes() const { #ifdef DEBUG for (auto &block : blocks) { D_ASSERT(block->block->GetMemoryUsage() == BufferManager::GetAllocSize(block->capacity * entry_size)); } #endif } static inline idx_t EntriesPerBlock(idx_t width) { return Storage::BLOCK_SIZE / width; } private: mutex rdc_lock; //! Copying is not allowed RowDataCollection(const RowDataCollection &) = delete; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/bound_query_node.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! Bound equivalent of QueryNode class BoundQueryNode { public: explicit BoundQueryNode(QueryNodeType type) : type(type) { } virtual ~BoundQueryNode() { } //! The type of the query node, either SetOperation or Select QueryNodeType type; //! The result modifiers that should be applied to this query node vector> modifiers; //! The names returned by this QueryNode. vector names; //! The types returned by this QueryNode. vector types; public: virtual idx_t GetRootIndex() = 0; public: template TARGET &Cast() { if (type != TARGET::TYPE) { throw InternalException("Failed to cast bound query node to type - query node type mismatch"); } return reinterpret_cast(*this); } template const TARGET &Cast() const { if (type != TARGET::TYPE) { throw InternalException("Failed to cast bound query node to type - query node type mismatch"); } return reinterpret_cast(*this); } }; } // namespace duckdb namespace duckdb { class RowLayout; struct LocalSortState; struct SortConstants { static constexpr idx_t VALUES_PER_RADIX = 256; static constexpr idx_t MSD_RADIX_LOCATIONS = VALUES_PER_RADIX + 1; static constexpr idx_t INSERTION_SORT_THRESHOLD = 24; static constexpr idx_t MSD_RADIX_SORT_SIZE_THRESHOLD = 4; }; struct SortLayout { public: SortLayout() { } explicit SortLayout(const vector &orders); SortLayout GetPrefixComparisonLayout(idx_t num_prefix_cols) const; public: idx_t column_count; vector order_types; vector order_by_null_types; vector logical_types; bool all_constant; vector constant_size; vector column_sizes; vector prefix_lengths; vector stats; vector has_null; idx_t comparison_size; idx_t entry_size; RowLayout blob_layout; unordered_map sorting_to_blob_col; }; struct GlobalSortState { public: GlobalSortState(BufferManager &buffer_manager, const vector &orders, RowLayout &payload_layout); //! Add local state sorted data to this global state void AddLocalState(LocalSortState &local_sort_state); //! Prepares the GlobalSortState for the merge sort phase (after completing radix sort phase) void PrepareMergePhase(); //! Initializes the global sort state for another round of merging void InitializeMergeRound(); //! Completes the cascaded merge sort round. //! Pass true if you wish to use the radix data for further comparisons. void CompleteMergeRound(bool keep_radix_data = false); //! Print the sorted data to the console. void Print(); public: //! The lock for updating the order global state mutex lock; //! The buffer manager BufferManager &buffer_manager; //! Sorting and payload layouts const SortLayout sort_layout; const RowLayout payload_layout; //! Sorted data vector> sorted_blocks; vector>> sorted_blocks_temp; unique_ptr odd_one_out; //! Pinned heap data (if sorting in memory) vector> heap_blocks; vector pinned_blocks; //! Capacity (number of rows) used to initialize blocks idx_t block_capacity; //! Whether we are doing an external sort bool external; //! Progress in merge path stage idx_t pair_idx; idx_t num_pairs; idx_t l_start; idx_t r_start; }; struct LocalSortState { public: LocalSortState(); //! Initialize the layouts and RowDataCollections void Initialize(GlobalSortState &global_sort_state, BufferManager &buffer_manager_p); //! Sink one DataChunk into the local sort state void SinkChunk(DataChunk &sort, DataChunk &payload); //! Size of accumulated data in bytes idx_t SizeInBytes() const; //! Sort the data accumulated so far void Sort(GlobalSortState &global_sort_state, bool reorder_heap); //! Concatenate the blocks held by a RowDataCollection into a single block static unique_ptr ConcatenateBlocks(RowDataCollection &row_data); private: //! Sorts the data in the newly created SortedBlock void SortInMemory(); //! Re-order the local state after sorting void ReOrder(GlobalSortState &gstate, bool reorder_heap); //! Re-order a SortedData object after sorting void ReOrder(SortedData &sd, data_ptr_t sorting_ptr, RowDataCollection &heap, GlobalSortState &gstate, bool reorder_heap); public: //! Whether this local state has been initialized bool initialized; //! The buffer manager BufferManager *buffer_manager; //! The sorting and payload layouts const SortLayout *sort_layout; const RowLayout *payload_layout; //! Radix/memcmp sortable data unique_ptr radix_sorting_data; //! Variable sized sorting data and accompanying heap unique_ptr blob_sorting_data; unique_ptr blob_sorting_heap; //! Payload data and accompanying heap unique_ptr payload_data; unique_ptr payload_heap; //! Sorted data vector> sorted_blocks; private: //! Selection vector and addresses for scattering the data to rows const SelectionVector &sel_ptr = *FlatVector::IncrementalSelectionVector(); Vector addresses = Vector(LogicalType::POINTER); }; struct MergeSorter { public: MergeSorter(GlobalSortState &state, BufferManager &buffer_manager); //! Finds and merges partitions until the current cascaded merge round is finished void PerformInMergeRound(); private: //! The global sorting state GlobalSortState &state; //! The sorting and payload layouts BufferManager &buffer_manager; const SortLayout &sort_layout; //! The left and right reader unique_ptr left; unique_ptr right; //! Input and output blocks unique_ptr left_input; unique_ptr right_input; SortedBlock *result; private: //! Computes the left and right block that will be merged next (Merge Path partition) void GetNextPartition(); //! Finds the boundary of the next partition using binary search void GetIntersection(const idx_t diagonal, idx_t &l_idx, idx_t &r_idx); //! Compare values within SortedBlocks using a global index int CompareUsingGlobalIndex(SBScanState &l, SBScanState &r, const idx_t l_idx, const idx_t r_idx); //! Finds the next partition and merges it void MergePartition(); //! Computes how the next 'count' tuples should be merged by setting the 'left_smaller' array void ComputeMerge(const idx_t &count, bool left_smaller[]); //! Merges the radix sorting blocks according to the 'left_smaller' array void MergeRadix(const idx_t &count, const bool left_smaller[]); //! Merges SortedData according to the 'left_smaller' array void MergeData(SortedData &result_data, SortedData &l_data, SortedData &r_data, const idx_t &count, const bool left_smaller[], idx_t next_entry_sizes[], bool reset_indices); //! Merges constant size rows according to the 'left_smaller' array void MergeRows(data_ptr_t &l_ptr, idx_t &l_entry_idx, const idx_t &l_count, data_ptr_t &r_ptr, idx_t &r_entry_idx, const idx_t &r_count, RowDataBlock &target_block, data_ptr_t &target_ptr, const idx_t &entry_size, const bool left_smaller[], idx_t &copied, const idx_t &count); //! Flushes constant size rows into the result void FlushRows(data_ptr_t &source_ptr, idx_t &source_entry_idx, const idx_t &source_count, RowDataBlock &target_block, data_ptr_t &target_ptr, const idx_t &entry_size, idx_t &copied, const idx_t &count); //! Flushes blob rows and accompanying heap void FlushBlobs(const RowLayout &layout, const idx_t &source_count, data_ptr_t &source_data_ptr, idx_t &source_entry_idx, data_ptr_t &source_heap_ptr, RowDataBlock &target_data_block, data_ptr_t &target_data_ptr, RowDataBlock &target_heap_block, BufferHandle &target_heap_handle, data_ptr_t &target_heap_ptr, idx_t &copied, const idx_t &count); }; } // namespace duckdb namespace duckdb { class Index; class ConflictInfo { public: ConflictInfo(const unordered_set &column_ids, bool only_check_unique = true) : column_ids(column_ids), only_check_unique(only_check_unique) { } const unordered_set &column_ids; public: bool ConflictTargetMatches(Index &index) const; void VerifyAllConflictsMeetCondition() const; public: bool only_check_unique = true; }; } // namespace duckdb namespace duckdb { class ClientContext; class TableIOManager; class Transaction; class ConflictManager; struct IndexLock; struct IndexScanState; //! The index is an abstract base class that serves as the basis for indexes class Index { public: Index(AttachedDatabase &db, IndexType type, TableIOManager &table_io_manager, const vector &column_ids, const vector> &unbound_expressions, IndexConstraintType constraint_type); virtual ~Index() = default; //! The type of the index IndexType type; //! Associated table io manager TableIOManager &table_io_manager; //! Column identifiers to extract key columns from the base table vector column_ids; //! Unordered set of column_ids used by the index unordered_set column_id_set; //! Unbound expressions used by the index during optimizations vector> unbound_expressions; //! The physical types stored in the index vector types; //! The logical types of the expressions vector logical_types; //! Index constraint type (primary key, foreign key, ...) IndexConstraintType constraint_type; //! Attached database instance AttachedDatabase &db; //! Buffer manager of the database instance BufferManager &buffer_manager; public: //! Initialize a single predicate scan on the index with the given expression and column IDs virtual unique_ptr InitializeScanSinglePredicate(const Transaction &transaction, const Value &value, const ExpressionType expression_type) = 0; //! Initialize a two predicate scan on the index with the given expression and column IDs virtual unique_ptr InitializeScanTwoPredicates(const Transaction &transaction, const Value &low_value, const ExpressionType low_expression_type, const Value &high_value, const ExpressionType high_expression_type) = 0; //! Performs a lookup on the index, fetching up to max_count result IDs. Returns true if all row IDs were fetched, //! and false otherwise virtual bool Scan(const Transaction &transaction, const DataTable &table, IndexScanState &state, const idx_t max_count, vector &result_ids) = 0; //! Obtain a lock on the index virtual void InitializeLock(IndexLock &state); //! Called when data is appended to the index. The lock obtained from InitializeLock must be held virtual PreservedError Append(IndexLock &state, DataChunk &entries, Vector &row_identifiers) = 0; //! Obtains a lock and calls Append while holding that lock PreservedError Append(DataChunk &entries, Vector &row_identifiers); //! Verify that data can be appended to the index without a constraint violation virtual void VerifyAppend(DataChunk &chunk) = 0; //! Verify that data can be appended to the index without a constraint violation using the conflict manager virtual void VerifyAppend(DataChunk &chunk, ConflictManager &conflict_manager) = 0; //! Performs constraint checking for a chunk of input data virtual void CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_manager) = 0; //! Delete a chunk of entries from the index. The lock obtained from InitializeLock must be held virtual void Delete(IndexLock &state, DataChunk &entries, Vector &row_identifiers) = 0; //! Obtains a lock and calls Delete while holding that lock void Delete(DataChunk &entries, Vector &row_identifiers); //! Insert a chunk of entries into the index virtual PreservedError Insert(IndexLock &lock, DataChunk &input, Vector &row_identifiers) = 0; //! Merge another index into this index. The lock obtained from InitializeLock must be held, and the other //! index must also be locked during the merge virtual bool MergeIndexes(IndexLock &state, Index &other_index) = 0; //! Obtains a lock and calls MergeIndexes while holding that lock bool MergeIndexes(Index &other_index); //! Traverses an ART and vacuums the qualifying nodes. The lock obtained from InitializeLock must be held virtual void Vacuum(IndexLock &state) = 0; //! Obtains a lock and calls Vacuum while holding that lock void Vacuum(); //! Returns the string representation of an index, or only traverses and verifies the index virtual string VerifyAndToString(IndexLock &state, const bool only_verify) = 0; //! Obtains a lock and calls VerifyAndToString while holding that lock string VerifyAndToString(const bool only_verify); //! Returns true if the index is affected by updates on the specified column IDs, and false otherwise bool IndexIsUpdated(const vector &column_ids) const; //! Returns unique flag bool IsUnique() { return (constraint_type == IndexConstraintType::UNIQUE || constraint_type == IndexConstraintType::PRIMARY); } //! Returns primary key flag bool IsPrimary() { return (constraint_type == IndexConstraintType::PRIMARY); } //! Returns foreign key flag bool IsForeign() { return (constraint_type == IndexConstraintType::FOREIGN); } //! Serializes the index and returns the pair of block_id offset positions virtual BlockPointer Serialize(MetaBlockWriter &writer); //! Returns the serialized data pointer to the block and offset of the serialized index BlockPointer GetSerializedDataPointer() const { return serialized_data_pointer; } //! Execute the index expressions on an input chunk void ExecuteExpressions(DataChunk &input, DataChunk &result); static string AppendRowError(DataChunk &input, idx_t index); protected: //! Lock used for any changes to the index mutex lock; //! Pointer to serialized index data BlockPointer serialized_data_pointer; private: //! Bound expressions used during expression execution vector> bound_expressions; //! Expression executor to execute the index expressions ExpressionExecutor executor; //! Bind the unbound expressions of the index unique_ptr BindExpression(unique_ptr expr); public: template TARGET &Cast() { D_ASSERT(dynamic_cast(this)); return reinterpret_cast(*this); } template const TARGET &Cast() const { D_ASSERT(dynamic_cast(this)); return reinterpret_cast(*this); } }; } // namespace duckdb namespace duckdb { class ConflictManager; class TableIndexList { public: //! Scan the catalog set, invoking the callback method for every entry template void Scan(T &&callback) { // lock the catalog set lock_guard lock(indexes_lock); for (auto &index : indexes) { if (callback(*index)) { break; } } } const vector> &Indexes() const { return indexes; } void AddIndex(unique_ptr index); void RemoveIndex(Index &index); bool Empty(); idx_t Count(); void Move(TableIndexList &other); Index *FindForeignKeyIndex(const vector &fk_keys, ForeignKeyType fk_type); void VerifyForeignKey(const vector &fk_keys, DataChunk &chunk, ConflictManager &conflict_manager); //! Serialize all indexes owned by this table, returns a vector of block info of all indexes vector SerializeIndexes(duckdb::MetaBlockWriter &writer); vector GetRequiredColumns(); private: //! Indexes associated with the current table mutex indexes_lock; vector> indexes; }; } // namespace duckdb namespace duckdb { class CatalogEntry; struct BoundCreateTableInfo { explicit BoundCreateTableInfo(SchemaCatalogEntry &schema, unique_ptr base_p) : schema(schema), base(std::move(base_p)) { D_ASSERT(base); } //! The schema to create the table in SchemaCatalogEntry &schema; //! The base CreateInfo object unique_ptr base; //! Column dependency manager of the table ColumnDependencyManager column_dependency_manager; //! List of constraints on the table vector> constraints; //! List of bound constraints on the table vector> bound_constraints; //! Bound default values vector> bound_defaults; //! Dependents of the table (in e.g. default values) DependencyList dependencies; //! The existing table data on disk (if any) unique_ptr data; //! CREATE TABLE from QUERY unique_ptr query; //! Indexes created by this table vector indexes; //! Serializes a BoundCreateTableInfo to a stand-alone binary blob void Serialize(Serializer &serializer) const; //! Deserializes a blob back into a BoundCreateTableInfo static unique_ptr Deserialize(Deserializer &source, PlanDeserializationState &state); CreateTableInfo &Base() { D_ASSERT(base); return (CreateTableInfo &)*base; } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/default/default_types.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class SchemaCatalogEntry; class DefaultTypeGenerator : public DefaultGenerator { public: DefaultTypeGenerator(Catalog &catalog, SchemaCatalogEntry &schema); SchemaCatalogEntry &schema; public: DUCKDB_API static LogicalTypeId GetDefaultType(const string &name); unique_ptr CreateDefaultEntry(ClientContext &context, const string &entry_name) override; vector GetDefaultEntries() override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/main/extension_entries.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct ExtensionEntry { char name[48]; char extension[48]; }; static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {{"->>", "json"}, {"array_to_json", "json"}, {"create_fts_index", "fts"}, {"current_localtime", "icu"}, {"current_localtimestamp", "icu"}, {"dbgen", "tpch"}, {"drop_fts_index", "fts"}, {"dsdgen", "tpcds"}, {"excel_text", "excel"}, {"from_json", "json"}, {"from_json_strict", "json"}, {"from_substrait", "substrait"}, {"from_substrait_json", "substrait"}, {"get_substrait", "substrait"}, {"get_substrait_json", "substrait"}, {"icu_calendar_names", "icu"}, {"icu_sort_key", "icu"}, {"json", "json"}, {"json_array", "json"}, {"json_array_length", "json"}, {"json_contains", "json"}, {"json_extract", "json"}, {"json_extract_path", "json"}, {"json_extract_path_text", "json"}, {"json_extract_string", "json"}, {"json_group_array", "json"}, {"json_group_object", "json"}, {"json_group_structure", "json"}, {"json_keys", "json"}, {"json_merge_patch", "json"}, {"json_object", "json"}, {"json_quote", "json"}, {"json_structure", "json"}, {"json_transform", "json"}, {"json_transform_strict", "json"}, {"json_type", "json"}, {"json_valid", "json"}, {"json_serialize_sql", "json"}, {"json_deserialize_sql", "json"}, {"json_serialize_sql", "json"}, {"json_execute_serialized_sql", "json"}, {"make_timestamptz", "icu"}, {"parquet_metadata", "parquet"}, {"parquet_scan", "parquet"}, {"parquet_schema", "parquet"}, {"pg_timezone_names", "icu"}, {"postgres_attach", "postgres_scanner"}, {"postgres_scan", "postgres_scanner"}, {"postgres_scan_pushdown", "postgres_scanner"}, {"read_json", "json"}, {"read_json_auto", "json"}, {"read_json_objects", "json"}, {"read_json_objects_auto", "json"}, {"read_ndjson", "json"}, {"read_ndjson_auto", "json"}, {"read_ndjson_objects", "json"}, {"read_parquet", "parquet"}, {"row_to_json", "json"}, {"scan_arrow_ipc", "arrow"}, {"sqlite_attach", "sqlite_scanner"}, {"sqlite_scan", "sqlite_scanner"}, {"stem", "fts"}, {"text", "excel"}, {"to_arrow_ipc", "arrow"}, {"to_json", "json"}, {"tpcds", "tpcds"}, {"tpcds_answers", "tpcds"}, {"tpcds_queries", "tpcds"}, {"tpch", "tpch"}, {"tpch_answers", "tpch"}, {"tpch_queries", "tpch"}, {"visualize_diff_profiling_output", "visualizer"}, {"visualize_json_profiling_output", "visualizer"}, {"visualize_last_profiling_output", "visualizer"}, {"st_distance_spheroid", "spatial"}, {"st_boundary", "spatial"}, {"st_makeline", "spatial"}, {"st_buffer", "spatial"}, {"st_x", "spatial"}, {"st_isring", "spatial"}, {"st_centroid", "spatial"}, {"st_read", "spatial"}, {"st_geomfromwkb", "spatial"}, {"st_list_proj_crs", "spatial"}, {"st_isvalid", "spatial"}, {"st_polygon2dfromwkb", "spatial"}, {"st_disjoint", "spatial"}, {"st_length", "spatial"}, {"st_difference", "spatial"}, {"st_area", "spatial"}, {"st_union", "spatial"}, {"st_isclosed", "spatial"}, {"st_asgeojson", "spatial"}, {"st_intersection", "spatial"}, {"st_transform", "spatial"}, {"st_dwithin", "spatial"}, {"st_perimeter", "spatial"}, {"st_issimple", "spatial"}, {"st_geometrytype", "spatial"}, {"st_simplifypreservetopology", "spatial"}, {"st_distance", "spatial"}, {"st_astext", "spatial"}, {"st_overlaps", "spatial"}, {"st_convexhull", "spatial"}, {"st_normalize", "spatial"}, {"st_drivers", "spatial"}, {"st_point2dfromwkb", "spatial"}, {"st_point2d", "spatial"}, {"st_y", "spatial"}, {"st_dwithin_spheroid", "spatial"}, {"st_isempty", "spatial"}, {"st_simplify", "spatial"}, {"st_area_spheroid", "spatial"}, {"st_within", "spatial"}, {"st_length_spheroid", "spatial"}, {"st_point3d", "spatial"}, {"st_containsproperly", "spatial"}, {"st_contains", "spatial"}, {"st_collect", "spatial"}, {"st_touches", "spatial"}, {"st_linestring2dfromwkb", "spatial"}, {"st_flipcoordinates", "spatial"}, {"st_ashexwkb", "spatial"}, {"st_geomfromtext", "spatial"}, {"st_point4d", "spatial"}, {"st_point", "spatial"}, {"st_coveredby", "spatial"}, {"st_perimeter_spheroid", "spatial"}, {"st_intersects", "spatial"}, {"st_crosses", "spatial"}, {"st_covers", "spatial"}, {"st_envelope", "spatial"}, {"st_aswkb", "spatial"}, {"st_equals", "spatial"}, {"st_collectionextract", "spatial"}, {"st_npoints", "spatial"}, {"st_pointonsurface", "spatial"}, {"st_dimension", "spatial"}, {"st_removerepeatedpoints", "spatial"}, {"st_geomfromgeojson", "spatial"}, {"st_readosm", "spatial"}, {"st_numpoints", "spatial"}}; static constexpr ExtensionEntry EXTENSION_SETTINGS[] = { {"binary_as_string", "parquet"}, {"calendar", "icu"}, {"http_retries", "httpfs"}, {"http_retry_backoff", "httpfs"}, {"http_retry_wait_ms", "httpfs"}, {"http_timeout", "httpfs"}, {"force_download", "httpfs"}, {"s3_access_key_id", "httpfs"}, {"s3_endpoint", "httpfs"}, {"s3_region", "httpfs"}, {"s3_secret_access_key", "httpfs"}, {"s3_session_token", "httpfs"}, {"s3_uploader_max_filesize", "httpfs"}, {"s3_uploader_max_parts_per_file", "httpfs"}, {"s3_uploader_thread_limit", "httpfs"}, {"s3_url_compatibility_mode", "httpfs"}, {"s3_url_style", "httpfs"}, {"s3_use_ssl", "httpfs"}, {"sqlite_all_varchar", "sqlite_scanner"}, {"timezone", "icu"}, }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/main/attached_database.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class Catalog; class DatabaseInstance; class StorageManager; class TransactionManager; class StorageExtension; struct AttachInfo; enum class AttachedDatabaseType { READ_WRITE_DATABASE, READ_ONLY_DATABASE, SYSTEM_DATABASE, TEMP_DATABASE, }; //! The AttachedDatabase represents an attached database instance class AttachedDatabase : public CatalogEntry { public: //! Create the built-in system attached database (without storage) explicit AttachedDatabase(DatabaseInstance &db, AttachedDatabaseType type = AttachedDatabaseType::SYSTEM_DATABASE); //! Create an attached database instance with the specified name and storage AttachedDatabase(DatabaseInstance &db, Catalog &catalog, string name, string file_path, AccessMode access_mode); //! Create an attached database instance with the specified storage extension AttachedDatabase(DatabaseInstance &db, Catalog &catalog, StorageExtension &ext, string name, AttachInfo &info, AccessMode access_mode); ~AttachedDatabase() override; void Initialize(); Catalog &ParentCatalog() override; StorageManager &GetStorageManager(); Catalog &GetCatalog(); TransactionManager &GetTransactionManager(); DatabaseInstance &GetDatabase() { return db; } const string &GetName() const { return name; } bool IsSystem() const; bool IsTemporary() const; bool IsReadOnly() const; bool IsInitialDatabase() const; void SetInitialDatabase(); static string ExtractDatabaseName(const string &dbpath); private: DatabaseInstance &db; unique_ptr storage; unique_ptr catalog; unique_ptr transaction_manager; AttachedDatabaseType type; optional_ptr parent_catalog; bool is_initial_database = false; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/main/database_manager.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class AttachedDatabase; class Catalog; class CatalogSet; class ClientContext; class DatabaseInstance; //! The DatabaseManager is a class that sits at the root of all attached databases class DatabaseManager { friend class Catalog; public: explicit DatabaseManager(DatabaseInstance &db); ~DatabaseManager(); public: static DatabaseManager &Get(DatabaseInstance &db); static DatabaseManager &Get(ClientContext &db); static DatabaseManager &Get(AttachedDatabase &db); void InitializeSystemCatalog(); //! Get an attached database with the given name optional_ptr GetDatabase(ClientContext &context, const string &name); //! Add a new attached database to the database manager void AddDatabase(ClientContext &context, unique_ptr db); void DetachDatabase(ClientContext &context, const string &name, OnEntryNotFound if_not_found); //! Returns a reference to the system catalog Catalog &GetSystemCatalog(); static const string &GetDefaultDatabase(ClientContext &context); void SetDefaultDatabase(ClientContext &context, const string &new_value); optional_ptr GetDatabaseFromPath(ClientContext &context, const string &path); vector> GetDatabases(ClientContext &context); transaction_t GetNewQueryNumber() { return current_query_number++; } transaction_t ActiveQueryNumber() const { return current_query_number; } idx_t ModifyCatalog() { return catalog_version++; } bool HasDefaultDatabase() { return !default_database.empty(); } private: //! The system database is a special database that holds system entries (e.g. functions) unique_ptr system; //! The set of attached databases unique_ptr databases; //! The global catalog version, incremented whenever anything changes in the catalog atomic catalog_version; //! The current query number atomic current_query_number; //! The current default database string default_database; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/function/built_in_functions.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class BuiltinFunctions { public: BuiltinFunctions(CatalogTransaction transaction, Catalog &catalog); ~BuiltinFunctions(); //! Initialize a catalog with all built-in functions void Initialize(); public: void AddFunction(AggregateFunctionSet set); void AddFunction(AggregateFunction function); void AddFunction(ScalarFunctionSet set); void AddFunction(PragmaFunction function); void AddFunction(const string &name, PragmaFunctionSet functions); void AddFunction(ScalarFunction function); void AddFunction(const vector &names, ScalarFunction function); void AddFunction(TableFunctionSet set); void AddFunction(TableFunction function); void AddFunction(CopyFunction function); void AddCollation(string name, ScalarFunction function, bool combinable = false, bool not_required_for_equality = false); private: CatalogTransaction transaction; Catalog &catalog; private: template void Register() { T::RegisterFunction(*this); } // table-producing functions void RegisterTableScanFunctions(); void RegisterSQLiteFunctions(); void RegisterReadFunctions(); void RegisterTableFunctions(); void RegisterArrowFunctions(); // aggregates void RegisterDistributiveAggregates(); // scalar functions void RegisterGenericFunctions(); void RegisterOperators(); void RegisterStringFunctions(); void RegisterNestedFunctions(); void RegisterSequenceFunctions(); // pragmas void RegisterPragmaFunctions(); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/queue.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { using std::queue; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/duck_index_entry.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! An index catalog entry class DuckIndexEntry : public IndexCatalogEntry { public: //! Create an IndexCatalogEntry and initialize storage for it DuckIndexEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateIndexInfo &info); ~DuckIndexEntry(); shared_ptr info; public: string GetSchemaName() const override; string GetTableName() const override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/data_table.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/table/column_segment.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/storage_lock.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class StorageLock; enum class StorageLockType { SHARED = 0, EXCLUSIVE = 1 }; class StorageLockKey { public: StorageLockKey(StorageLock &lock, StorageLockType type); ~StorageLockKey(); private: StorageLock &lock; StorageLockType type; }; class StorageLock { friend class StorageLockKey; public: StorageLock(); //! Get an exclusive lock unique_ptr GetExclusiveLock(); //! Get a shared lock unique_ptr GetSharedLock(); private: mutex exclusive_lock; atomic read_count; private: //! Release an exclusive lock void ReleaseExclusiveLock(); //! Release a shared lock void ReleaseSharedLock(); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/function/compression_function.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class DatabaseInstance; class ColumnData; class ColumnDataCheckpointer; class ColumnSegment; class SegmentStatistics; struct ColumnFetchState; struct ColumnScanState; struct SegmentScanState; struct AnalyzeState { virtual ~AnalyzeState() { } template TARGET &Cast() { D_ASSERT(dynamic_cast(this)); return reinterpret_cast(*this); } template const TARGET &Cast() const { D_ASSERT(dynamic_cast(this)); return reinterpret_cast(*this); } }; struct CompressionState { virtual ~CompressionState() { } template TARGET &Cast() { D_ASSERT(dynamic_cast(this)); return reinterpret_cast(*this); } template const TARGET &Cast() const { D_ASSERT(dynamic_cast(this)); return reinterpret_cast(*this); } }; struct CompressedSegmentState { virtual ~CompressedSegmentState() { } template TARGET &Cast() { D_ASSERT(dynamic_cast(this)); return reinterpret_cast(*this); } template const TARGET &Cast() const { D_ASSERT(dynamic_cast(this)); return reinterpret_cast(*this); } }; struct CompressionAppendState { CompressionAppendState(BufferHandle handle_p) : handle(std::move(handle_p)) { } virtual ~CompressionAppendState() { } BufferHandle handle; template TARGET &Cast() { D_ASSERT(dynamic_cast(this)); return reinterpret_cast(*this); } template const TARGET &Cast() const { D_ASSERT(dynamic_cast(this)); return reinterpret_cast(*this); } }; //===--------------------------------------------------------------------===// // Analyze //===--------------------------------------------------------------------===// //! The analyze functions are used to determine whether or not to use this compression method //! The system first determines the potential compression methods to use based on the physical type of the column //! After that the following steps are taken: //! 1. The init_analyze is called to initialize the analyze state of every candidate compression method //! 2. The analyze method is called with all of the input data in the order in which it must be stored. //! analyze can return "false". In that case, the compression method is taken out of consideration early. //! 3. The final_analyze method is called, which should return a score for the compression method //! The system then decides which compression function to use based on the analyzed score (returned from final_analyze) typedef unique_ptr (*compression_init_analyze_t)(ColumnData &col_data, PhysicalType type); typedef bool (*compression_analyze_t)(AnalyzeState &state, Vector &input, idx_t count); typedef idx_t (*compression_final_analyze_t)(AnalyzeState &state); //===--------------------------------------------------------------------===// // Compress //===--------------------------------------------------------------------===// typedef unique_ptr (*compression_init_compression_t)(ColumnDataCheckpointer &checkpointer, unique_ptr state); typedef void (*compression_compress_data_t)(CompressionState &state, Vector &scan_vector, idx_t count); typedef void (*compression_compress_finalize_t)(CompressionState &state); //===--------------------------------------------------------------------===// // Uncompress / Scan //===--------------------------------------------------------------------===// typedef unique_ptr (*compression_init_segment_scan_t)(ColumnSegment &segment); //! Function prototype used for reading an entire vector (STANDARD_VECTOR_SIZE) typedef void (*compression_scan_vector_t)(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result); //! Function prototype used for reading an arbitrary ('scan_count') number of values typedef void (*compression_scan_partial_t)(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result, idx_t result_offset); //! Function prototype used for reading a single value typedef void (*compression_fetch_row_t)(ColumnSegment &segment, ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx); //! Function prototype used for skipping 'skip_count' values, non-trivial if random-access is not supported for the //! compressed data. typedef void (*compression_skip_t)(ColumnSegment &segment, ColumnScanState &state, idx_t skip_count); //===--------------------------------------------------------------------===// // Append (optional) //===--------------------------------------------------------------------===// typedef unique_ptr (*compression_init_segment_t)(ColumnSegment &segment, block_id_t block_id); typedef unique_ptr (*compression_init_append_t)(ColumnSegment &segment); typedef idx_t (*compression_append_t)(CompressionAppendState &append_state, ColumnSegment &segment, SegmentStatistics &stats, UnifiedVectorFormat &data, idx_t offset, idx_t count); typedef idx_t (*compression_finalize_append_t)(ColumnSegment &segment, SegmentStatistics &stats); typedef void (*compression_revert_append_t)(ColumnSegment &segment, idx_t start_row); class CompressionFunction { public: CompressionFunction(CompressionType type, PhysicalType data_type, compression_init_analyze_t init_analyze, compression_analyze_t analyze, compression_final_analyze_t final_analyze, compression_init_compression_t init_compression, compression_compress_data_t compress, compression_compress_finalize_t compress_finalize, compression_init_segment_scan_t init_scan, compression_scan_vector_t scan_vector, compression_scan_partial_t scan_partial, compression_fetch_row_t fetch_row, compression_skip_t skip, compression_init_segment_t init_segment = nullptr, compression_init_append_t init_append = nullptr, compression_append_t append = nullptr, compression_finalize_append_t finalize_append = nullptr, compression_revert_append_t revert_append = nullptr) : type(type), data_type(data_type), init_analyze(init_analyze), analyze(analyze), final_analyze(final_analyze), init_compression(init_compression), compress(compress), compress_finalize(compress_finalize), init_scan(init_scan), scan_vector(scan_vector), scan_partial(scan_partial), fetch_row(fetch_row), skip(skip), init_segment(init_segment), init_append(init_append), append(append), finalize_append(finalize_append), revert_append(revert_append) { } //! Compression type CompressionType type; //! The data type this function can compress PhysicalType data_type; //! Analyze step: determine which compression function is the most effective //! init_analyze is called once to set up the analyze state compression_init_analyze_t init_analyze; //! analyze is called several times (once per vector in the row group) //! analyze should return true, unless compression is no longer possible with this compression method //! in that case false should be returned compression_analyze_t analyze; //! final_analyze should return the score of the compression function //! ideally this is the exact number of bytes required to store the data //! this is not required/enforced: it can be an estimate as well //! also this function can return DConstants::INVALID_INDEX to skip this compression method compression_final_analyze_t final_analyze; //! Compression step: actually compress the data //! init_compression is called once to set up the comperssion state compression_init_compression_t init_compression; //! compress is called several times (once per vector in the row group) compression_compress_data_t compress; //! compress_finalize is called after compression_compress_finalize_t compress_finalize; //! init_scan is called to set up the scan state compression_init_segment_scan_t init_scan; //! scan_vector scans an entire vector using the scan state compression_scan_vector_t scan_vector; //! scan_partial scans a subset of a vector //! this can request > vector_size as well //! this is used if a vector crosses segment boundaries, or for child columns of lists compression_scan_partial_t scan_partial; //! fetch an individual row from the compressed vector //! used for index lookups compression_fetch_row_t fetch_row; //! Skip forward in the compressed segment compression_skip_t skip; // Append functions //! This only really needs to be defined for uncompressed segments //! Initialize a compressed segment (optional) compression_init_segment_t init_segment; //! Initialize the append state (optional) compression_init_append_t init_append; //! Append to the compressed segment (optional) compression_append_t append; //! Finalize an append to the segment compression_finalize_append_t finalize_append; //! Revert append (optional) compression_revert_append_t revert_append; }; //! The set of compression functions struct CompressionFunctionSet { mutex lock; map> functions; }; } // namespace duckdb namespace duckdb { class ColumnSegment; class BlockManager; class ColumnSegment; class ColumnData; class DatabaseInstance; class Transaction; class BaseStatistics; class UpdateSegment; class TableFilter; struct ColumnFetchState; struct ColumnScanState; struct ColumnAppendState; enum class ColumnSegmentType : uint8_t { TRANSIENT, PERSISTENT }; //! TableFilter represents a filter pushed down into the table scan. class ColumnSegment : public SegmentBase { public: ~ColumnSegment(); //! The database instance DatabaseInstance &db; //! The type stored in the column LogicalType type; //! The size of the type idx_t type_size; //! The column segment type (transient or persistent) ColumnSegmentType segment_type; //! The compression function reference function; //! The statistics for the segment SegmentStatistics stats; //! The block that this segment relates to shared_ptr block; static unique_ptr CreatePersistentSegment(DatabaseInstance &db, BlockManager &block_manager, block_id_t id, idx_t offset, const LogicalType &type_p, idx_t start, idx_t count, CompressionType compression_type, BaseStatistics statistics); static unique_ptr CreateTransientSegment(DatabaseInstance &db, const LogicalType &type, idx_t start, idx_t segment_size = Storage::BLOCK_SIZE); static unique_ptr CreateSegment(ColumnSegment &other, idx_t start); public: void InitializeScan(ColumnScanState &state); //! Scan one vector from this segment void Scan(ColumnScanState &state, idx_t scan_count, Vector &result, idx_t result_offset, bool entire_vector); //! Fetch a value of the specific row id and append it to the result void FetchRow(ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx); static idx_t FilterSelection(SelectionVector &sel, Vector &result, const TableFilter &filter, idx_t &approved_tuple_count, ValidityMask &mask); //! Skip a scan forward to the row_index specified in the scan state void Skip(ColumnScanState &state); // The maximum size of the buffer (in bytes) idx_t SegmentSize() const; //! Resize the block void Resize(idx_t segment_size); //! Initialize an append of this segment. Appends are only supported on transient segments. void InitializeAppend(ColumnAppendState &state); //! Appends a (part of) vector to the segment, returns the amount of entries successfully appended idx_t Append(ColumnAppendState &state, UnifiedVectorFormat &data, idx_t offset, idx_t count); //! Finalize the segment for appending - no more appends can follow on this segment //! The segment should be compacted as much as possible //! Returns the number of bytes occupied within the segment idx_t FinalizeAppend(ColumnAppendState &state); //! Revert an append made to this segment void RevertAppend(idx_t start_row); //! Convert a transient in-memory segment into a persistent segment blocked by an on-disk block. //! Only used during checkpointing. void ConvertToPersistent(optional_ptr block_manager, block_id_t block_id); //! Updates pointers to refer to the given block and offset. This is only used //! when sharing a block among segments. This is invoked only AFTER the block is written. void MarkAsPersistent(shared_ptr block, uint32_t offset_in_block); block_id_t GetBlockId() { D_ASSERT(segment_type == ColumnSegmentType::PERSISTENT); return block_id; } BlockManager &GetBlockManager() const { return block->block_manager; } idx_t GetBlockOffset() { D_ASSERT(segment_type == ColumnSegmentType::PERSISTENT || offset == 0); return offset; } idx_t GetRelativeIndex(idx_t row_index) { D_ASSERT(row_index >= this->start); D_ASSERT(row_index <= this->start + this->count); return row_index - this->start; } CompressedSegmentState *GetSegmentState() { return segment_state.get(); } public: ColumnSegment(DatabaseInstance &db, shared_ptr block, LogicalType type, ColumnSegmentType segment_type, idx_t start, idx_t count, CompressionFunction &function, BaseStatistics statistics, block_id_t block_id, idx_t offset, idx_t segment_size); ColumnSegment(ColumnSegment &other, idx_t start); private: void Scan(ColumnScanState &state, idx_t scan_count, Vector &result); void ScanPartial(ColumnScanState &state, idx_t scan_count, Vector &result, idx_t result_offset); private: //! The block id that this segment relates to (persistent segment only) block_id_t block_id; //! The offset into the block (persistent segment only) idx_t offset; //! The allocated segment size idx_t segment_size; //! Storage associated with the compressed segment unique_ptr segment_state; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/table/row_group_collection.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/table/segment_tree.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/table/segment_lock.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct SegmentLock { public: SegmentLock() { } SegmentLock(mutex &lock) : lock(lock) { } // disable copy constructors SegmentLock(const SegmentLock &other) = delete; SegmentLock &operator=(const SegmentLock &) = delete; //! enable move constructors SegmentLock(SegmentLock &&other) noexcept { std::swap(lock, other.lock); } SegmentLock &operator=(SegmentLock &&other) noexcept { std::swap(lock, other.lock); return *this; } private: unique_lock lock; }; } // namespace duckdb namespace duckdb { template struct SegmentNode { idx_t row_start; unique_ptr node; }; //! The SegmentTree maintains a list of all segments of a specific column in a table, and allows searching for a segment //! by row number template class SegmentTree { private: class SegmentIterationHelper; public: explicit SegmentTree() : finished_loading(true) { } virtual ~SegmentTree() { } //! Locks the segment tree. All methods to the segment tree either lock the segment tree, or take an already //! obtained lock. SegmentLock Lock() { return SegmentLock(node_lock); } bool IsEmpty(SegmentLock &l) { return GetRootSegment(l) == nullptr; } //! Gets a pointer to the first segment. Useful for scans. T *GetRootSegment() { auto l = Lock(); return GetRootSegment(l); } T *GetRootSegment(SegmentLock &l) { if (nodes.empty()) { LoadNextSegment(l); } return GetRootSegmentInternal(); } //! Obtains ownership of the data of the segment tree vector> MoveSegments(SegmentLock &l) { LoadAllSegments(l); return std::move(nodes); } vector> MoveSegments() { auto l = Lock(); return MoveSegments(l); } idx_t GetSegmentCount() { auto l = Lock(); return nodes.size(); } //! Gets a pointer to the nth segment. Negative numbers start from the back. T *GetSegmentByIndex(int64_t index) { auto l = Lock(); return GetSegmentByIndex(l, index); } T *GetSegmentByIndex(SegmentLock &l, int64_t index) { if (index < 0) { // load all segments LoadAllSegments(l); index = nodes.size() + index; if (index < 0) { return nullptr; } return nodes[index].node.get(); } else { // lazily load segments until we reach the specific segment while (idx_t(index) >= nodes.size() && LoadNextSegment(l)) { } if (idx_t(index) >= nodes.size()) { return nullptr; } return nodes[index].node.get(); } } //! Gets the next segment T *GetNextSegment(T *segment) { if (!SUPPORTS_LAZY_LOADING) { return segment->Next(); } if (finished_loading) { return segment->Next(); } auto l = Lock(); return GetNextSegment(l, segment); } T *GetNextSegment(SegmentLock &l, T *segment) { if (!segment) { return nullptr; } #ifdef DEBUG D_ASSERT(nodes[segment->index].node.get() == segment); #endif return GetSegmentByIndex(l, segment->index + 1); } //! Gets a pointer to the last segment. Useful for appends. T *GetLastSegment(SegmentLock &l) { LoadAllSegments(l); if (nodes.empty()) { return nullptr; } return nodes.back().node.get(); } //! Gets a pointer to a specific column segment for the given row T *GetSegment(idx_t row_number) { auto l = Lock(); return GetSegment(l, row_number); } T *GetSegment(SegmentLock &l, idx_t row_number) { return nodes[GetSegmentIndex(l, row_number)].node.get(); } //! Append a column segment to the tree void AppendSegmentInternal(SegmentLock &l, unique_ptr segment) { D_ASSERT(segment); // add the node to the list of nodes if (!nodes.empty()) { nodes.back().node->next = segment.get(); } SegmentNode node; segment->index = nodes.size(); node.row_start = segment->start; node.node = std::move(segment); nodes.push_back(std::move(node)); } void AppendSegment(unique_ptr segment) { auto l = Lock(); AppendSegment(l, std::move(segment)); } void AppendSegment(SegmentLock &l, unique_ptr segment) { LoadAllSegments(l); AppendSegmentInternal(l, std::move(segment)); } //! Debug method, check whether the segment is in the segment tree bool HasSegment(T *segment) { auto l = Lock(); return HasSegment(l, segment); } bool HasSegment(SegmentLock &, T *segment) { return segment->index < nodes.size() && nodes[segment->index].node.get() == segment; } //! Replace this tree with another tree, taking over its nodes in-place void Replace(SegmentTree &other) { auto l = Lock(); Replace(l, other); } void Replace(SegmentLock &l, SegmentTree &other) { other.LoadAllSegments(l); nodes = std::move(other.nodes); } //! Erase all segments after a specific segment void EraseSegments(SegmentLock &l, idx_t segment_start) { LoadAllSegments(l); if (segment_start >= nodes.size() - 1) { return; } nodes.erase(nodes.begin() + segment_start + 1, nodes.end()); } //! Get the segment index of the column segment for the given row idx_t GetSegmentIndex(SegmentLock &l, idx_t row_number) { idx_t segment_index; if (TryGetSegmentIndex(l, row_number, segment_index)) { return segment_index; } string error; error = StringUtil::Format("Attempting to find row number \"%lld\" in %lld nodes\n", row_number, nodes.size()); for (idx_t i = 0; i < nodes.size(); i++) { error += StringUtil::Format("Node %lld: Start %lld, Count %lld", i, nodes[i].row_start, nodes[i].node->count.load()); } throw InternalException("Could not find node in column segment tree!\n%s%s", error, Exception::GetStackTrace()); } bool TryGetSegmentIndex(SegmentLock &l, idx_t row_number, idx_t &result) { // load segments until the row number is within bounds while (nodes.empty() || (row_number >= (nodes.back().row_start + nodes.back().node->count))) { if (!LoadNextSegment(l)) { break; } } if (nodes.empty()) { return false; } D_ASSERT(!nodes.empty()); D_ASSERT(row_number >= nodes[0].row_start); D_ASSERT(row_number < nodes.back().row_start + nodes.back().node->count); idx_t lower = 0; idx_t upper = nodes.size() - 1; // binary search to find the node while (lower <= upper) { idx_t index = (lower + upper) / 2; D_ASSERT(index < nodes.size()); auto &entry = nodes[index]; D_ASSERT(entry.row_start == entry.node->start); if (row_number < entry.row_start) { upper = index - 1; } else if (row_number >= entry.row_start + entry.node->count) { lower = index + 1; } else { result = index; return true; } } return false; } void Verify(SegmentLock &) { #ifdef DEBUG idx_t base_start = nodes.empty() ? 0 : nodes[0].node->start; for (idx_t i = 0; i < nodes.size(); i++) { D_ASSERT(nodes[i].row_start == nodes[i].node->start); D_ASSERT(nodes[i].node->start == base_start); base_start += nodes[i].node->count; } #endif } void Verify() { #ifdef DEBUG auto l = Lock(); Verify(l); #endif } SegmentIterationHelper Segments() { return SegmentIterationHelper(*this); } void Reinitialize() { if (nodes.empty()) { return; } idx_t offset = nodes[0].node->start; for (auto &entry : nodes) { if (entry.node->start != offset) { throw InternalException("In SegmentTree::Reinitialize - gap found between nodes!"); } entry.row_start = offset; offset += entry.node->count; } } protected: atomic finished_loading; //! Load the next segment - only used when lazily loading virtual unique_ptr LoadSegment() { return nullptr; } private: //! The nodes in the tree, can be binary searched vector> nodes; //! Lock to access or modify the nodes mutex node_lock; private: T *GetRootSegmentInternal() { return nodes.empty() ? nullptr : nodes[0].node.get(); } class SegmentIterationHelper { public: explicit SegmentIterationHelper(SegmentTree &tree) : tree(tree) { } private: SegmentTree &tree; private: class SegmentIterator { public: SegmentIterator(SegmentTree &tree_p, T *current_p) : tree(tree_p), current(current_p) { } SegmentTree &tree; T *current; public: void Next() { current = tree.GetNextSegment(current); } SegmentIterator &operator++() { Next(); return *this; } bool operator!=(const SegmentIterator &other) const { return current != other.current; } T &operator*() const { D_ASSERT(current); return *current; } }; public: SegmentIterator begin() { return SegmentIterator(tree, tree.GetRootSegment()); } SegmentIterator end() { return SegmentIterator(tree, nullptr); } }; //! Load the next segment, if there are any left to load bool LoadNextSegment(SegmentLock &l) { if (!SUPPORTS_LAZY_LOADING) { return false; } if (finished_loading) { return false; } auto result = LoadSegment(); if (result) { AppendSegmentInternal(l, std::move(result)); return true; } return false; } //! Load all segments, if there are any left to load void LoadAllSegments(SegmentLock &l) { if (!SUPPORTS_LAZY_LOADING) { return; } while (LoadNextSegment(l)) ; } }; } // namespace duckdb namespace duckdb { struct ParallelTableScanState; struct ParallelCollectionScanState; class CreateIndexScanState; class CollectionScanState; class PersistentTableData; class TableDataWriter; class TableIndexList; class TableStatistics; struct TableAppendState; class DuckTransaction; class BoundConstraint; class RowGroupSegmentTree; struct ColumnSegmentInfo; class RowGroupCollection { public: RowGroupCollection(shared_ptr info, BlockManager &block_manager, vector types, idx_t row_start, idx_t total_rows = 0); public: idx_t GetTotalRows() const; Allocator &GetAllocator() const; void Initialize(PersistentTableData &data); void InitializeEmpty(); bool IsEmpty() const; void AppendRowGroup(SegmentLock &l, idx_t start_row); //! Get the nth row-group, negative numbers start from the back (so -1 is the last row group, etc) RowGroup *GetRowGroup(int64_t index); idx_t RowGroupCount(); void Verify(); void InitializeScan(CollectionScanState &state, const vector &column_ids, TableFilterSet *table_filters); void InitializeCreateIndexScan(CreateIndexScanState &state); void InitializeScanWithOffset(CollectionScanState &state, const vector &column_ids, idx_t start_row, idx_t end_row); static bool InitializeScanInRowGroup(CollectionScanState &state, RowGroupCollection &collection, RowGroup &row_group, idx_t vector_index, idx_t max_row); void InitializeParallelScan(ParallelCollectionScanState &state); bool NextParallelScan(ClientContext &context, ParallelCollectionScanState &state, CollectionScanState &scan_state); bool Scan(DuckTransaction &transaction, const vector &column_ids, const std::function &fun); bool Scan(DuckTransaction &transaction, const std::function &fun); void Fetch(TransactionData transaction, DataChunk &result, const vector &column_ids, const Vector &row_identifiers, idx_t fetch_count, ColumnFetchState &state); //! Initialize an append of a variable number of rows. FinalizeAppend must be called after appending is done. void InitializeAppend(TableAppendState &state); //! Initialize an append with a known number of rows. FinalizeAppend should not be called after appending is done. void InitializeAppend(TransactionData transaction, TableAppendState &state, idx_t append_count); //! Appends to the row group collection. Returns true if a new row group has been created to append to bool Append(DataChunk &chunk, TableAppendState &state); //! FinalizeAppend flushes an append with a variable number of rows. void FinalizeAppend(TransactionData transaction, TableAppendState &state); void CommitAppend(transaction_t commit_id, idx_t row_start, idx_t count); void RevertAppendInternal(idx_t start_row, idx_t count); void MergeStorage(RowGroupCollection &data); void RemoveFromIndexes(TableIndexList &indexes, Vector &row_identifiers, idx_t count); idx_t Delete(TransactionData transaction, DataTable &table, row_t *ids, idx_t count); void Update(TransactionData transaction, row_t *ids, const vector &column_ids, DataChunk &updates); void UpdateColumn(TransactionData transaction, Vector &row_ids, const vector &column_path, DataChunk &updates); void Checkpoint(TableDataWriter &writer, TableStatistics &global_stats); void CommitDropColumn(idx_t index); void CommitDropTable(); vector GetColumnSegmentInfo(); const vector &GetTypes() const; shared_ptr AddColumn(ClientContext &context, ColumnDefinition &new_column, Expression *default_value); shared_ptr RemoveColumn(idx_t col_idx); shared_ptr AlterType(ClientContext &context, idx_t changed_idx, const LogicalType &target_type, vector bound_columns, Expression &cast_expr); void VerifyNewConstraint(DataTable &parent, const BoundConstraint &constraint); void CopyStats(TableStatistics &stats); unique_ptr CopyStats(column_t column_id); void SetDistinct(column_t column_id, unique_ptr distinct_stats); AttachedDatabase &GetAttached(); DatabaseInstance &GetDatabase(); BlockManager &GetBlockManager() { return block_manager; } DataTableInfo &GetTableInfo() { return *info; } private: bool IsEmpty(SegmentLock &) const; private: //! BlockManager BlockManager &block_manager; //! The number of rows in the table atomic total_rows; //! The data table info shared_ptr info; //! The column types of the row group collection vector types; idx_t row_start; //! The segment trees holding the various row_groups of the table shared_ptr row_groups; //! Table statistics TableStatistics stats; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/transaction/local_storage.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/optimistic_data_writer.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class PartialBlockManager; class OptimisticDataWriter { public: OptimisticDataWriter(DataTable &table); OptimisticDataWriter(DataTable &table, OptimisticDataWriter &parent); ~OptimisticDataWriter(); //! Write a new row group to disk (if possible) void WriteNewRowGroup(RowGroupCollection &row_groups); //! Write the last row group of a collection to disk void WriteLastRowGroup(RowGroupCollection &row_groups); //! Final flush of the optimistic writer - fully flushes the partial block manager void FinalFlush(); //! Flushes a specific row group to disk void FlushToDisk(RowGroup *row_group); //! Merge the partially written blocks from one optimistic writer into another void Merge(OptimisticDataWriter &other); //! Rollback void Rollback(); private: //! Prepare a write to disk bool PrepareWrite(); private: //! The table DataTable &table; //! The partial block manager (if we created one yet) unique_ptr partial_manager; }; } // namespace duckdb namespace duckdb { class AttachedDatabase; class DataTable; class Transaction; class WriteAheadLog; struct LocalAppendState; struct TableAppendState; class LocalTableStorage : public std::enable_shared_from_this { public: // Create a new LocalTableStorage explicit LocalTableStorage(DataTable &table); // Create a LocalTableStorage from an ALTER TYPE LocalTableStorage(ClientContext &context, DataTable &table, LocalTableStorage &parent, idx_t changed_idx, const LogicalType &target_type, const vector &bound_columns, Expression &cast_expr); // Create a LocalTableStorage from a DROP COLUMN LocalTableStorage(DataTable &table, LocalTableStorage &parent, idx_t drop_idx); // Create a LocalTableStorage from an ADD COLUMN LocalTableStorage(ClientContext &context, DataTable &table, LocalTableStorage &parent, ColumnDefinition &new_column, optional_ptr default_value); ~LocalTableStorage(); reference table_ref; Allocator &allocator; //! The main chunk collection holding the data shared_ptr row_groups; //! The set of unique indexes TableIndexList indexes; //! The number of deleted rows idx_t deleted_rows; //! The main optimistic data writer OptimisticDataWriter optimistic_writer; //! The set of all optimistic data writers associated with this table vector> optimistic_writers; //! Whether or not storage was merged bool merged_storage = false; public: void InitializeScan(CollectionScanState &state, optional_ptr table_filters = nullptr); //! Write a new row group to disk (if possible) void WriteNewRowGroup(); void FlushBlocks(); void Rollback(); idx_t EstimatedSize(); void AppendToIndexes(DuckTransaction &transaction, TableAppendState &append_state, idx_t append_count, bool append_to_table); PreservedError AppendToIndexes(DuckTransaction &transaction, RowGroupCollection &source, TableIndexList &index_list, const vector &table_types, row_t &start_row); //! Creates an optimistic writer for this table OptimisticDataWriter &CreateOptimisticWriter(); void FinalizeOptimisticWriter(OptimisticDataWriter &writer); }; class LocalTableManager { public: shared_ptr MoveEntry(DataTable &table); reference_map_t> MoveEntries(); optional_ptr GetStorage(DataTable &table); LocalTableStorage &GetOrCreateStorage(DataTable &table); idx_t EstimatedSize(); bool IsEmpty(); void InsertEntry(DataTable &table, shared_ptr entry); private: mutex table_storage_lock; reference_map_t> table_storage; }; //! The LocalStorage class holds appends that have not been committed yet class LocalStorage { public: // Threshold to merge row groups instead of appending static constexpr const idx_t MERGE_THRESHOLD = RowGroup::ROW_GROUP_SIZE; public: struct CommitState { CommitState(); ~CommitState(); reference_map_t> append_states; }; public: explicit LocalStorage(ClientContext &context, DuckTransaction &transaction); static LocalStorage &Get(DuckTransaction &transaction); static LocalStorage &Get(ClientContext &context, AttachedDatabase &db); static LocalStorage &Get(ClientContext &context, Catalog &catalog); //! Initialize a scan of the local storage void InitializeScan(DataTable &table, CollectionScanState &state, optional_ptr table_filters); //! Scan void Scan(CollectionScanState &state, const vector &column_ids, DataChunk &result); void InitializeParallelScan(DataTable &table, ParallelCollectionScanState &state); bool NextParallelScan(ClientContext &context, DataTable &table, ParallelCollectionScanState &state, CollectionScanState &scan_state); //! Begin appending to the local storage void InitializeAppend(LocalAppendState &state, DataTable &table); //! Append a chunk to the local storage static void Append(LocalAppendState &state, DataChunk &chunk); //! Finish appending to the local storage static void FinalizeAppend(LocalAppendState &state); //! Merge a row group collection into the transaction-local storage void LocalMerge(DataTable &table, RowGroupCollection &collection); //! Create an optimistic writer for the specified table OptimisticDataWriter &CreateOptimisticWriter(DataTable &table); void FinalizeOptimisticWriter(DataTable &table, OptimisticDataWriter &writer); //! Delete a set of rows from the local storage idx_t Delete(DataTable &table, Vector &row_ids, idx_t count); //! Update a set of rows in the local storage void Update(DataTable &table, Vector &row_ids, const vector &column_ids, DataChunk &data); //! Commits the local storage, writing it to the WAL and completing the commit void Commit(LocalStorage::CommitState &commit_state, DuckTransaction &transaction); //! Rollback the local storage void Rollback(); bool ChangesMade() noexcept; idx_t EstimatedSize(); bool Find(DataTable &table); idx_t AddedRows(DataTable &table); void AddColumn(DataTable &old_dt, DataTable &new_dt, ColumnDefinition &new_column, optional_ptr default_value); void DropColumn(DataTable &old_dt, DataTable &new_dt, idx_t removed_column); void ChangeType(DataTable &old_dt, DataTable &new_dt, idx_t changed_idx, const LogicalType &target_type, const vector &bound_columns, Expression &cast_expr); void MoveStorage(DataTable &old_dt, DataTable &new_dt); void FetchChunk(DataTable &table, Vector &row_ids, idx_t count, const vector &col_ids, DataChunk &chunk, ColumnFetchState &fetch_state); TableIndexList &GetIndexes(DataTable &table); void VerifyNewConstraint(DataTable &parent, const BoundConstraint &constraint); private: ClientContext &context; DuckTransaction &transaction; LocalTableManager table_manager; void Flush(DataTable &table, LocalTableStorage &storage); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/table/data_table_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class DatabaseInstance; class TableIOManager; struct DataTableInfo { DataTableInfo(AttachedDatabase &db, shared_ptr table_io_manager_p, string schema, string table); //! The database instance of the table AttachedDatabase &db; //! The table IO manager shared_ptr table_io_manager; //! The amount of elements in the table. Note that this number signifies the amount of COMMITTED entries in the //! table. It can be inaccurate inside of transactions. More work is needed to properly support that. atomic cardinality; // schema of the table string schema; // name of the table string table; TableIndexList indexes; bool IsTemporary() const; }; } // namespace duckdb namespace duckdb { class BoundForeignKeyConstraint; class ClientContext; class ColumnDataCollection; class ColumnDefinition; class DataTable; class DuckTransaction; class OptimisticDataWriter; class RowGroup; class StorageManager; class TableCatalogEntry; class TableIOManager; class Transaction; class WriteAheadLog; class TableDataWriter; class ConflictManager; class TableScanState; enum class VerifyExistenceType : uint8_t; //! DataTable represents a physical table on disk class DataTable { public: //! Constructs a new data table from an (optional) set of persistent segments DataTable(AttachedDatabase &db, shared_ptr table_io_manager, const string &schema, const string &table, vector column_definitions_p, unique_ptr data = nullptr); //! Constructs a DataTable as a delta on an existing data table with a newly added column DataTable(ClientContext &context, DataTable &parent, ColumnDefinition &new_column, Expression *default_value); //! Constructs a DataTable as a delta on an existing data table but with one column removed DataTable(ClientContext &context, DataTable &parent, idx_t removed_column); //! Constructs a DataTable as a delta on an existing data table but with one column changed type DataTable(ClientContext &context, DataTable &parent, idx_t changed_idx, const LogicalType &target_type, const vector &bound_columns, Expression &cast_expr); //! Constructs a DataTable as a delta on an existing data table but with one column added new constraint explicit DataTable(ClientContext &context, DataTable &parent, unique_ptr constraint); //! The table info shared_ptr info; //! The set of physical columns stored by this DataTable vector column_definitions; //! A reference to the database instance AttachedDatabase &db; public: //! Returns a list of types of the table vector GetTypes(); void InitializeScan(TableScanState &state, const vector &column_ids, TableFilterSet *table_filter = nullptr); void InitializeScan(DuckTransaction &transaction, TableScanState &state, const vector &column_ids, TableFilterSet *table_filters = nullptr); //! Returns the maximum amount of threads that should be assigned to scan this data table idx_t MaxThreads(ClientContext &context); void InitializeParallelScan(ClientContext &context, ParallelTableScanState &state); bool NextParallelScan(ClientContext &context, ParallelTableScanState &state, TableScanState &scan_state); //! Scans up to STANDARD_VECTOR_SIZE elements from the table starting //! from offset and store them in result. Offset is incremented with how many //! elements were returned. //! Returns true if all pushed down filters were executed during data fetching void Scan(DuckTransaction &transaction, DataChunk &result, TableScanState &state); //! Fetch data from the specific row identifiers from the base table void Fetch(DuckTransaction &transaction, DataChunk &result, const vector &column_ids, const Vector &row_ids, idx_t fetch_count, ColumnFetchState &state); //! Initializes an append to transaction-local storage void InitializeLocalAppend(LocalAppendState &state, ClientContext &context); //! Append a DataChunk to the transaction-local storage of the table. void LocalAppend(LocalAppendState &state, TableCatalogEntry &table, ClientContext &context, DataChunk &chunk, bool unsafe = false); //! Finalizes a transaction-local append void FinalizeLocalAppend(LocalAppendState &state); //! Append a chunk to the transaction-local storage of this table void LocalAppend(TableCatalogEntry &table, ClientContext &context, DataChunk &chunk); //! Append a column data collection to the transaction-local storage of this table void LocalAppend(TableCatalogEntry &table, ClientContext &context, ColumnDataCollection &collection); //! Merge a row group collection into the transaction-local storage void LocalMerge(ClientContext &context, RowGroupCollection &collection); //! Creates an optimistic writer for this table - used for optimistically writing parallel appends OptimisticDataWriter &CreateOptimisticWriter(ClientContext &context); void FinalizeOptimisticWriter(ClientContext &context, OptimisticDataWriter &writer); //! Delete the entries with the specified row identifier from the table idx_t Delete(TableCatalogEntry &table, ClientContext &context, Vector &row_ids, idx_t count); //! Update the entries with the specified row identifier from the table void Update(TableCatalogEntry &table, ClientContext &context, Vector &row_ids, const vector &column_ids, DataChunk &data); //! Update a single (sub-)column along a column path //! The column_path vector is a *path* towards a column within the table //! i.e. if we have a table with a single column S STRUCT(A INT, B INT) //! and we update the validity mask of "S.B" //! the column path is: //! 0 (first column of table) //! -> 1 (second subcolumn of struct) //! -> 0 (first subcolumn of INT) //! This method should only be used from the WAL replay. It does not verify update constraints. void UpdateColumn(TableCatalogEntry &table, ClientContext &context, Vector &row_ids, const vector &column_path, DataChunk &updates); //! Add an index to the DataTable. NOTE: for CREATE (UNIQUE) INDEX statements, we use the PhysicalCreateIndex //! operator. This function is only used during the WAL replay, and is a much less performant index creation //! approach. void WALAddIndex(ClientContext &context, unique_ptr index, const vector> &expressions); //! Fetches an append lock void AppendLock(TableAppendState &state); //! Begin appending structs to this table, obtaining necessary locks, etc void InitializeAppend(DuckTransaction &transaction, TableAppendState &state, idx_t append_count); //! Append a chunk to the table using the AppendState obtained from InitializeAppend void Append(DataChunk &chunk, TableAppendState &state); //! Commit the append void CommitAppend(transaction_t commit_id, idx_t row_start, idx_t count); //! Write a segment of the table to the WAL void WriteToLog(WriteAheadLog &log, idx_t row_start, idx_t count); //! Revert a set of appends made by the given AppendState, used to revert appends in the event of an error during //! commit (e.g. because of an I/O exception) void RevertAppend(idx_t start_row, idx_t count); void RevertAppendInternal(idx_t start_row, idx_t count); void ScanTableSegment(idx_t start_row, idx_t count, const std::function &function); //! Merge a row group collection directly into this table - appending it to the end of the table without copying void MergeStorage(RowGroupCollection &data, TableIndexList &indexes); //! Append a chunk with the row ids [row_start, ..., row_start + chunk.size()] to all indexes of the table, returns //! whether or not the append succeeded PreservedError AppendToIndexes(DataChunk &chunk, row_t row_start); static PreservedError AppendToIndexes(TableIndexList &indexes, DataChunk &chunk, row_t row_start); //! Remove a chunk with the row ids [row_start, ..., row_start + chunk.size()] from all indexes of the table void RemoveFromIndexes(TableAppendState &state, DataChunk &chunk, row_t row_start); //! Remove the chunk with the specified set of row identifiers from all indexes of the table void RemoveFromIndexes(TableAppendState &state, DataChunk &chunk, Vector &row_identifiers); //! Remove the row identifiers from all the indexes of the table void RemoveFromIndexes(Vector &row_identifiers, idx_t count); void SetAsRoot() { this->is_root = true; } bool IsRoot() { return this->is_root; } //! Get statistics of a physical column within the table unique_ptr GetStatistics(ClientContext &context, column_t column_id); //! Sets statistics of a physical column within the table void SetDistinct(column_t column_id, unique_ptr distinct_stats); //! Checkpoint the table to the specified table data writer void Checkpoint(TableDataWriter &writer); void CommitDropTable(); void CommitDropColumn(idx_t index); idx_t GetTotalRows(); vector GetColumnSegmentInfo(); static bool IsForeignKeyIndex(const vector &fk_keys, Index &index, ForeignKeyType fk_type); //! Initializes a special scan that is used to create an index on the table, it keeps locks on the table void InitializeWALCreateIndexScan(CreateIndexScanState &state, const vector &column_ids); //! Scans the next chunk for the CREATE INDEX operator bool CreateIndexScan(TableScanState &state, DataChunk &result, TableScanType type); //! Verify constraints with a chunk from the Append containing all columns of the table void VerifyAppendConstraints(TableCatalogEntry &table, ClientContext &context, DataChunk &chunk, ConflictManager *conflict_manager = nullptr); public: static void VerifyUniqueIndexes(TableIndexList &indexes, ClientContext &context, DataChunk &chunk, ConflictManager *conflict_manager); private: //! Verify the new added constraints against current persistent&local data void VerifyNewConstraint(ClientContext &context, DataTable &parent, const BoundConstraint *constraint); //! Verify constraints with a chunk from the Update containing only the specified column_ids void VerifyUpdateConstraints(ClientContext &context, TableCatalogEntry &table, DataChunk &chunk, const vector &column_ids); //! Verify constraints with a chunk from the Delete containing all columns of the table void VerifyDeleteConstraints(TableCatalogEntry &table, ClientContext &context, DataChunk &chunk); void InitializeScanWithOffset(TableScanState &state, const vector &column_ids, idx_t start_row, idx_t end_row); void VerifyForeignKeyConstraint(const BoundForeignKeyConstraint &bfk, ClientContext &context, DataChunk &chunk, VerifyExistenceType verify_type); void VerifyAppendForeignKeyConstraint(const BoundForeignKeyConstraint &bfk, ClientContext &context, DataChunk &chunk); void VerifyDeleteForeignKeyConstraint(const BoundForeignKeyConstraint &bfk, ClientContext &context, DataChunk &chunk); private: //! Lock for appending entries to the table mutex append_lock; //! The row groups of the table shared_ptr row_groups; //! Whether or not the data table is the root DataTable for this table; the root DataTable is the newest version //! that can be appended to atomic is_root; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/index/art/art.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { // classes enum class VerifyExistenceType : uint8_t { APPEND = 0, // appends to a table APPEND_FK = 1, // appends to a table that has a foreign key DELETE_FK = 2 // delete from a table that has a foreign key }; class ConflictManager; class Node; class ARTKey; class FixedSizeAllocator; // structs struct ARTIndexScanState; struct ARTFlags { vector vacuum_flags; vector merge_buffer_counts; }; class ART : public Index { public: //! Constructs an ART ART(const vector &column_ids, TableIOManager &table_io_manager, const vector> &unbound_expressions, const IndexConstraintType constraint_type, AttachedDatabase &db, const idx_t block_id = DConstants::INVALID_INDEX, const idx_t block_offset = DConstants::INVALID_INDEX); ~ART() override; //! Root of the tree unique_ptr tree; //! Fixed-size allocators holding the ART nodes vector> allocators; public: //! Initialize a single predicate scan on the index with the given expression and column IDs unique_ptr InitializeScanSinglePredicate(const Transaction &transaction, const Value &value, const ExpressionType expression_type) override; //! Initialize a two predicate scan on the index with the given expression and column IDs unique_ptr InitializeScanTwoPredicates(const Transaction &transaction, const Value &low_value, const ExpressionType low_expression_type, const Value &high_value, const ExpressionType high_expression_type) override; //! Performs a lookup on the index, fetching up to max_count result IDs. Returns true if all row IDs were fetched, //! and false otherwise bool Scan(const Transaction &transaction, const DataTable &table, IndexScanState &state, const idx_t max_count, vector &result_ids) override; //! Called when data is appended to the index. The lock obtained from InitializeLock must be held PreservedError Append(IndexLock &lock, DataChunk &entries, Vector &row_identifiers) override; //! Verify that data can be appended to the index without a constraint violation void VerifyAppend(DataChunk &chunk) override; //! Verify that data can be appended to the index without a constraint violation using the conflict manager void VerifyAppend(DataChunk &chunk, ConflictManager &conflict_manager) override; //! Delete a chunk of entries from the index. The lock obtained from InitializeLock must be held void Delete(IndexLock &lock, DataChunk &entries, Vector &row_identifiers) override; //! Insert a chunk of entries into the index PreservedError Insert(IndexLock &lock, DataChunk &data, Vector &row_ids) override; //! Construct an ART from a vector of sorted keys bool ConstructFromSorted(idx_t count, vector &keys, Vector &row_identifiers); //! Search equal values and fetches the row IDs bool SearchEqual(ARTKey &key, idx_t max_count, vector &result_ids); //! Search equal values used for joins that do not need to fetch data void SearchEqualJoinNoFetch(ARTKey &key, idx_t &result_size); //! Serializes the index and returns the pair of block_id offset positions BlockPointer Serialize(MetaBlockWriter &writer) override; //! Merge another index into this index. The lock obtained from InitializeLock must be held, and the other //! index must also be locked during the merge bool MergeIndexes(IndexLock &state, Index &other_index) override; //! Traverses an ART and vacuums the qualifying nodes. The lock obtained from InitializeLock must be held void Vacuum(IndexLock &state) override; //! Generate ART keys for an input chunk static void GenerateKeys(ArenaAllocator &allocator, DataChunk &input, vector &keys); //! Generate a string containing all the expressions and their respective values that violate a constraint string GenerateErrorKeyName(DataChunk &input, idx_t row); //! Generate the matching error message for a constraint violation string GenerateConstraintErrorMessage(VerifyExistenceType verify_type, const string &key_name); //! Performs constraint checking for a chunk of input data void CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_manager) override; //! Returns the string representation of the ART, or only traverses and verifies the index string VerifyAndToString(IndexLock &state, const bool only_verify) override; //! Find the node with a matching key, or return nullptr if not found Node Lookup(Node node, const ARTKey &key, idx_t depth); private: //! Insert a row ID into a leaf bool InsertToLeaf(Node &leaf_node, const row_t &row_id); //! Insert a key into the tree bool Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id); //! Erase a key from the tree (if a leaf has more than one value) or erase the leaf itself void Erase(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id); //! Returns all row IDs belonging to a key greater (or equal) than the search key bool SearchGreater(ARTIndexScanState &state, ARTKey &key, bool inclusive, idx_t max_count, vector &result_ids); //! Returns all row IDs belonging to a key less (or equal) than the upper_bound bool SearchLess(ARTIndexScanState &state, ARTKey &upper_bound, bool inclusive, idx_t max_count, vector &result_ids); //! Returns all row IDs belonging to a key within the range of lower_bound and upper_bound bool SearchCloseRange(ARTIndexScanState &state, ARTKey &lower_bound, ARTKey &upper_bound, bool left_inclusive, bool right_inclusive, idx_t max_count, vector &result_ids); //! Initializes a merge operation by returning a set containing the buffer count of each fixed-size allocator void InitializeMerge(ARTFlags &flags); //! Initializes a vacuum operation by calling the initialize operation of the respective //! node allocator, and returns a vector containing either true, if the allocator at //! the respective position qualifies, or false, if not void InitializeVacuum(ARTFlags &flags); //! Finalizes a vacuum operation by calling the finalize operation of all qualifying //! fixed size allocators void FinalizeVacuum(const ARTFlags &flags); //! Internal function to return the string representation of the ART, //! or only traverses and verifies the index string VerifyAndToStringInternal(const bool only_verify); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/dschema_catalog_entry.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! A schema in the catalog class DuckSchemaEntry : public SchemaCatalogEntry { public: DuckSchemaEntry(Catalog &catalog, string name, bool is_internal); private: //! The catalog set holding the tables CatalogSet tables; //! The catalog set holding the indexes CatalogSet indexes; //! The catalog set holding the table functions CatalogSet table_functions; //! The catalog set holding the copy functions CatalogSet copy_functions; //! The catalog set holding the pragma functions CatalogSet pragma_functions; //! The catalog set holding the scalar and aggregate functions CatalogSet functions; //! The catalog set holding the sequences CatalogSet sequences; //! The catalog set holding the collations CatalogSet collations; //! The catalog set holding the types CatalogSet types; public: optional_ptr AddEntry(CatalogTransaction transaction, unique_ptr entry, OnCreateConflict on_conflict); optional_ptr AddEntryInternal(CatalogTransaction transaction, unique_ptr entry, OnCreateConflict on_conflict, DependencyList dependencies); optional_ptr CreateTable(CatalogTransaction transaction, BoundCreateTableInfo &info) override; optional_ptr CreateFunction(CatalogTransaction transaction, CreateFunctionInfo &info) override; optional_ptr CreateIndex(ClientContext &context, CreateIndexInfo &info, TableCatalogEntry &table) override; optional_ptr CreateView(CatalogTransaction transaction, CreateViewInfo &info) override; optional_ptr CreateSequence(CatalogTransaction transaction, CreateSequenceInfo &info) override; optional_ptr CreateTableFunction(CatalogTransaction transaction, CreateTableFunctionInfo &info) override; optional_ptr CreateCopyFunction(CatalogTransaction transaction, CreateCopyFunctionInfo &info) override; optional_ptr CreatePragmaFunction(CatalogTransaction transaction, CreatePragmaFunctionInfo &info) override; optional_ptr CreateCollation(CatalogTransaction transaction, CreateCollationInfo &info) override; optional_ptr CreateType(CatalogTransaction transaction, CreateTypeInfo &info) override; void Alter(ClientContext &context, AlterInfo &info) override; void Scan(ClientContext &context, CatalogType type, const std::function &callback) override; void Scan(CatalogType type, const std::function &callback) override; void DropEntry(ClientContext &context, DropInfo &info) override; optional_ptr GetEntry(CatalogTransaction transaction, CatalogType type, const string &name) override; SimilarCatalogEntry GetSimilarEntry(CatalogTransaction transaction, CatalogType type, const string &name) override; void Verify(Catalog &catalog) override; private: //! Get the catalog set for the specified type CatalogSet &GetCatalogSet(CatalogType type); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/default/default_functions.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class SchemaCatalogEntry; struct DefaultMacro { const char *schema; const char *name; const char *parameters[8]; const char *macro; }; class DefaultFunctionGenerator : public DefaultGenerator { public: DefaultFunctionGenerator(Catalog &catalog, SchemaCatalogEntry &schema); SchemaCatalogEntry &schema; DUCKDB_API static unique_ptr CreateInternalMacroInfo(DefaultMacro &default_macro); DUCKDB_API static unique_ptr CreateInternalTableMacroInfo(DefaultMacro &default_macro); public: unique_ptr CreateDefaultEntry(ClientContext &context, const string &entry_name) override; vector GetDefaultEntries() override; private: static unique_ptr CreateInternalTableMacroInfo(DefaultMacro &default_macro, unique_ptr function); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/default/default_views.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class SchemaCatalogEntry; class DefaultViewGenerator : public DefaultGenerator { public: DefaultViewGenerator(Catalog &catalog, SchemaCatalogEntry &schema); SchemaCatalogEntry &schema; public: unique_ptr CreateDefaultEntry(ClientContext &context, const string &entry_name) override; vector GetDefaultEntries() override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/macro_catalog_entry.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! A macro function in the catalog class TableMacroCatalogEntry : public MacroCatalogEntry { public: static constexpr const CatalogType Type = CatalogType::TABLE_MACRO_ENTRY; static constexpr const char *Name = "table macro function"; public: TableMacroCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateMacroInfo &info); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/dtable_catalog_entry.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! A table catalog entry class DuckTableEntry : public TableCatalogEntry { public: //! Create a TableCatalogEntry and initialize storage for it DuckTableEntry(Catalog &catalog, SchemaCatalogEntry &schema, BoundCreateTableInfo &info, std::shared_ptr inherited_storage = nullptr); public: unique_ptr AlterEntry(ClientContext &context, AlterInfo &info) override; void UndoAlter(ClientContext &context, AlterInfo &info) override; //! Returns the underlying storage of the table DataTable &GetStorage() override; //! Returns a list of the bound constraints of the table const vector> &GetBoundConstraints() override; //! Get statistics of a column (physical or virtual) within the table unique_ptr GetStatistics(ClientContext &context, column_t column_id) override; unique_ptr Copy(ClientContext &context) const override; void SetAsRoot() override; void CommitAlter(string &column_name); void CommitDrop(); TableFunction GetScanFunction(ClientContext &context, unique_ptr &bind_data) override; vector GetColumnSegmentInfo() override; TableStorageInfo GetStorageInfo(ClientContext &context) override; bool IsDuckTable() const override { return true; } private: unique_ptr RenameColumn(ClientContext &context, RenameColumnInfo &info); unique_ptr AddColumn(ClientContext &context, AddColumnInfo &info); unique_ptr RemoveColumn(ClientContext &context, RemoveColumnInfo &info); unique_ptr SetDefault(ClientContext &context, SetDefaultInfo &info); unique_ptr ChangeColumnType(ClientContext &context, ChangeColumnTypeInfo &info); unique_ptr SetNotNull(ClientContext &context, SetNotNullInfo &info); unique_ptr DropNotNull(ClientContext &context, DropNotNullInfo &info); unique_ptr AddForeignKeyConstraint(ClientContext &context, AlterForeignKeyInfo &info); unique_ptr DropForeignKeyConstraint(ClientContext &context, AlterForeignKeyInfo &info); void UpdateConstraintsOnColumnDrop(const LogicalIndex &removed_index, const vector &adjusted_indices, const RemoveColumnInfo &info, CreateTableInfo &create_info, bool is_generated); private: //! A reference to the underlying storage unit used for this table std::shared_ptr storage; //! A list of constraints that are part of this table vector> bound_constraints; //! Manages dependencies of the individual columns of the table ColumnDependencyManager column_dependency_manager; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/constraints/bound_foreign_key_constraint.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class BoundForeignKeyConstraint : public BoundConstraint { public: static constexpr const ConstraintType TYPE = ConstraintType::FOREIGN_KEY; public: BoundForeignKeyConstraint(ForeignKeyInfo info_p, physical_index_set_t pk_key_set_p, physical_index_set_t fk_key_set_p) : BoundConstraint(ConstraintType::FOREIGN_KEY), info(std::move(info_p)), pk_key_set(std::move(pk_key_set_p)), fk_key_set(std::move(fk_key_set_p)) { #ifdef DEBUG D_ASSERT(info.pk_keys.size() == pk_key_set.size()); for (auto &key : info.pk_keys) { D_ASSERT(pk_key_set.find(key) != pk_key_set.end()); } D_ASSERT(info.fk_keys.size() == fk_key_set.size()); for (auto &key : info.fk_keys) { D_ASSERT(fk_key_set.find(key) != fk_key_set.end()); } #endif } ForeignKeyInfo info; //! The same keys but stored as an unordered set physical_index_set_t pk_key_set; //! The same keys but stored as an unordered set physical_index_set_t fk_key_set; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/constraints/foreign_key_constraint.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class ForeignKeyConstraint : public Constraint { public: static constexpr const ConstraintType TYPE = ConstraintType::FOREIGN_KEY; public: DUCKDB_API ForeignKeyConstraint(vector pk_columns, vector fk_columns, ForeignKeyInfo info); //! The set of main key table's columns vector pk_columns; //! The set of foreign key table's columns vector fk_columns; ForeignKeyInfo info; public: DUCKDB_API string ToString() const override; DUCKDB_API unique_ptr Copy() const override; //! Serialize to a stand-alone binary blob DUCKDB_API void Serialize(FieldWriter &writer) const override; //! Deserializes a ParsedConstraint DUCKDB_API static unique_ptr Deserialize(FieldReader &source); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/constraints/bound_check_constraint.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! The CheckConstraint contains an expression that must evaluate to TRUE for //! every row in a table class BoundCheckConstraint : public BoundConstraint { public: static constexpr const ConstraintType TYPE = ConstraintType::CHECK; public: BoundCheckConstraint() : BoundConstraint(ConstraintType::CHECK) { } //! The expression unique_ptr expression; //! The columns used by the CHECK constraint physical_index_set_t bound_columns; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/constraints/bound_not_null_constraint.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class BoundNotNullConstraint : public BoundConstraint { public: static constexpr const ConstraintType TYPE = ConstraintType::NOT_NULL; public: explicit BoundNotNullConstraint(PhysicalIndex index) : BoundConstraint(ConstraintType::NOT_NULL), index(index) { } //! Column index this constraint pertains to PhysicalIndex index; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/constraints/bound_unique_constraint.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class BoundUniqueConstraint : public BoundConstraint { public: static constexpr const ConstraintType TYPE = ConstraintType::UNIQUE; public: BoundUniqueConstraint(vector keys, logical_index_set_t key_set, bool is_primary_key) : BoundConstraint(ConstraintType::UNIQUE), keys(std::move(keys)), key_set(std::move(key_set)), is_primary_key(is_primary_key) { #ifdef DEBUG D_ASSERT(keys.size() == key_set.size()); for (auto &key : keys) { D_ASSERT(key_set.find(key) != key_set.end()); } #endif } //! The keys that define the unique constraint vector keys; //! The same keys but stored as an unordered set logical_index_set_t key_set; //! Whether or not the unique constraint is a primary key bool is_primary_key; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/expression/bound_reference_expression.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! A BoundReferenceExpression represents a physical index into a DataChunk class BoundReferenceExpression : public Expression { public: static constexpr const ExpressionClass TYPE = ExpressionClass::BOUND_REF; public: BoundReferenceExpression(string alias, LogicalType type, idx_t index); BoundReferenceExpression(LogicalType type, storage_t index); //! Index used to access data in the chunks storage_t index; public: bool IsScalar() const override { return false; } bool IsFoldable() const override { return false; } string ToString() const override; hash_t Hash() const override; bool Equals(const BaseExpression &other) const override; unique_ptr Copy() override; void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(ExpressionDeserializationState &state, FieldReader &reader); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/expression_binder/alter_binder.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class TableCatalogEntry; //! The ALTER binder is responsible for binding an expression within alter statements class AlterBinder : public ExpressionBinder { public: AlterBinder(Binder &binder, ClientContext &context, TableCatalogEntry &table, vector &bound_columns, LogicalType target_type); TableCatalogEntry &table; vector &bound_columns; protected: BindResult BindExpression(unique_ptr &expr_ptr, idx_t depth, bool root_expression = false) override; BindResult BindColumn(ColumnRefExpression &expr); string UnsupportedAggregateMessage() override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/filter/null_filter.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class IsNullFilter : public TableFilter { public: static constexpr const TableFilterType TYPE = TableFilterType::IS_NULL; public: IsNullFilter(); public: FilterPropagateResult CheckStatistics(BaseStatistics &stats) override; string ToString(const string &column_name) override; void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(FieldReader &source); }; class IsNotNullFilter : public TableFilter { public: static constexpr const TableFilterType TYPE = TableFilterType::IS_NOT_NULL; public: IsNotNullFilter(); public: FilterPropagateResult CheckStatistics(BaseStatistics &stats) override; string ToString(const string &column_name) override; void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(FieldReader &source); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/storage_manager.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/table_io_manager.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class BlockManager; class DataTable; class TableIOManager { public: virtual ~TableIOManager() { } //! Obtains a reference to the TableIOManager of a specific table static TableIOManager &Get(DataTable &table); //! The block manager used for managing index data virtual BlockManager &GetIndexBlockManager() = 0; //! The block manager used for storing row group data virtual BlockManager &GetBlockManagerForRowData() = 0; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/write_ahead_log.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/wal_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class WALType : uint8_t { INVALID = 0, // ----------------------------- // Catalog // ----------------------------- CREATE_TABLE = 1, DROP_TABLE = 2, CREATE_SCHEMA = 3, DROP_SCHEMA = 4, CREATE_VIEW = 5, DROP_VIEW = 6, CREATE_SEQUENCE = 8, DROP_SEQUENCE = 9, SEQUENCE_VALUE = 10, CREATE_MACRO = 11, DROP_MACRO = 12, CREATE_TYPE = 13, DROP_TYPE = 14, ALTER_INFO = 20, CREATE_TABLE_MACRO = 21, DROP_TABLE_MACRO = 22, CREATE_INDEX = 23, DROP_INDEX = 24, // ----------------------------- // Data // ----------------------------- USE_TABLE = 25, INSERT_TUPLE = 26, DELETE_TUPLE = 27, UPDATE_TUPLE = 28, // ----------------------------- // Flush // ----------------------------- CHECKPOINT = 99, WAL_FLUSH = 100 }; } namespace duckdb { struct AlterInfo; class AttachedDatabase; class BufferedSerializer; class Catalog; class DatabaseInstance; class SchemaCatalogEntry; class SequenceCatalogEntry; class ScalarMacroCatalogEntry; class ViewCatalogEntry; class TypeCatalogEntry; class TableCatalogEntry; class Transaction; class TransactionManager; class ReplayState { public: ReplayState(AttachedDatabase &db, ClientContext &context, Deserializer &source) : db(db), context(context), catalog(db.GetCatalog()), source(source), deserialize_only(false), checkpoint_id(INVALID_BLOCK) { } AttachedDatabase &db; ClientContext &context; Catalog &catalog; Deserializer &source; optional_ptr current_table; bool deserialize_only; block_id_t checkpoint_id; public: void ReplayEntry(WALType entry_type); protected: virtual void ReplayCreateTable(); void ReplayDropTable(); void ReplayAlter(); void ReplayCreateView(); void ReplayDropView(); void ReplayCreateSchema(); void ReplayDropSchema(); void ReplayCreateType(); void ReplayDropType(); void ReplayCreateSequence(); void ReplayDropSequence(); void ReplaySequenceValue(); void ReplayCreateMacro(); void ReplayDropMacro(); void ReplayCreateTableMacro(); void ReplayDropTableMacro(); void ReplayCreateIndex(); void ReplayDropIndex(); void ReplayUseTable(); void ReplayInsert(); void ReplayDelete(); void ReplayUpdate(); void ReplayCheckpoint(); }; //! The WriteAheadLog (WAL) is a log that is used to provide durability. Prior //! to committing a transaction it writes the changes the transaction made to //! the database to the log, which can then be replayed upon startup in case the //! server crashes or is shut down. class WriteAheadLog { public: //! Initialize the WAL in the specified directory explicit WriteAheadLog(AttachedDatabase &database, const string &path); virtual ~WriteAheadLog(); //! Skip writing to the WAL bool skip_writing; public: //! Replay the WAL static bool Replay(AttachedDatabase &database, string &path); //! Returns the current size of the WAL in bytes int64_t GetWALSize(); //! Gets the total bytes written to the WAL since startup idx_t GetTotalWritten(); virtual void WriteCreateTable(const TableCatalogEntry &entry); void WriteDropTable(const TableCatalogEntry &entry); void WriteCreateSchema(const SchemaCatalogEntry &entry); void WriteDropSchema(const SchemaCatalogEntry &entry); void WriteCreateView(const ViewCatalogEntry &entry); void WriteDropView(const ViewCatalogEntry &entry); void WriteCreateSequence(const SequenceCatalogEntry &entry); void WriteDropSequence(const SequenceCatalogEntry &entry); void WriteSequenceValue(const SequenceCatalogEntry &entry, SequenceValue val); void WriteCreateMacro(const ScalarMacroCatalogEntry &entry); void WriteDropMacro(const ScalarMacroCatalogEntry &entry); void WriteCreateTableMacro(const TableMacroCatalogEntry &entry); void WriteDropTableMacro(const TableMacroCatalogEntry &entry); void WriteCreateIndex(const IndexCatalogEntry &entry); void WriteDropIndex(const IndexCatalogEntry &entry); void WriteCreateType(const TypeCatalogEntry &entry); void WriteDropType(const TypeCatalogEntry &entry); //! Sets the table used for subsequent insert/delete/update commands void WriteSetTable(string &schema, string &table); void WriteAlter(data_ptr_t ptr, idx_t data_size); void WriteInsert(DataChunk &chunk); void WriteDelete(DataChunk &chunk); //! Write a single (sub-) column update to the WAL. Chunk must be a pair of (COL, ROW_ID). //! The column_path vector is a *path* towards a column within the table //! i.e. if we have a table with a single column S STRUCT(A INT, B INT) //! and we update the validity mask of "S.B" //! the column path is: //! 0 (first column of table) //! -> 1 (second subcolumn of struct) //! -> 0 (first subcolumn of INT) void WriteUpdate(DataChunk &chunk, const vector &column_path); //! Truncate the WAL to a previous size, and clear anything currently set in the writer void Truncate(int64_t size); //! Delete the WAL file on disk. The WAL should not be used after this point. void Delete(); void Flush(); void WriteCheckpoint(block_id_t meta_block); protected: AttachedDatabase &database; unique_ptr writer; string wal_path; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/database_size.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct DatabaseSize { idx_t total_blocks = 0; idx_t block_size = 0; idx_t free_blocks = 0; idx_t used_blocks = 0; idx_t bytes = 0; idx_t wal_size = 0; }; } // namespace duckdb namespace duckdb { class BlockManager; class Catalog; class CheckpointWriter; class DatabaseInstance; class TransactionManager; class TableCatalogEntry; class StorageCommitState { public: // Destruction of this object, without prior call to FlushCommit, // will roll back the committed changes. virtual ~StorageCommitState() { } // Make the commit persistent virtual void FlushCommit() = 0; }; //! StorageManager is responsible for managing the physical storage of the //! database on disk class StorageManager { public: StorageManager(AttachedDatabase &db, string path, bool read_only); virtual ~StorageManager(); public: static StorageManager &Get(AttachedDatabase &db); static StorageManager &Get(Catalog &catalog); //! Initialize a database or load an existing database from the given path void Initialize(); DatabaseInstance &GetDatabase(); AttachedDatabase &GetAttached() { return db; } //! Get the WAL of the StorageManager, returns nullptr if in-memory optional_ptr GetWriteAheadLog() { return wal.get(); } string GetDBPath() { return path; } bool InMemory(); virtual bool AutomaticCheckpoint(idx_t estimated_wal_bytes) = 0; virtual unique_ptr GenStorageCommitState(Transaction &transaction, bool checkpoint) = 0; virtual bool IsCheckpointClean(block_id_t checkpoint_id) = 0; virtual void CreateCheckpoint(bool delete_wal = false, bool force_checkpoint = false) = 0; virtual DatabaseSize GetDatabaseSize() = 0; virtual shared_ptr GetTableIOManager(BoundCreateTableInfo *info) = 0; protected: virtual void LoadDatabase() = 0; protected: //! The database this storagemanager belongs to AttachedDatabase &db; //! The path of the database string path; //! The WriteAheadLog of the storage manager unique_ptr wal; //! Whether or not the database is opened in read-only mode bool read_only; public: template TARGET &Cast() { D_ASSERT(dynamic_cast(this)); return reinterpret_cast(*this); } template const TARGET &Cast() const { D_ASSERT(dynamic_cast(this)); return reinterpret_cast(*this); } }; //! Stores database in a single file. class SingleFileStorageManager : public StorageManager { public: SingleFileStorageManager(AttachedDatabase &db, string path, bool read_only); //! The BlockManager to read/store meta information and data in blocks unique_ptr block_manager; //! TableIoManager unique_ptr table_io_manager; public: bool AutomaticCheckpoint(idx_t estimated_wal_bytes) override; unique_ptr GenStorageCommitState(Transaction &transaction, bool checkpoint) override; bool IsCheckpointClean(block_id_t checkpoint_id) override; void CreateCheckpoint(bool delete_wal, bool force_checkpoint) override; DatabaseSize GetDatabaseSize() override; shared_ptr GetTableIOManager(BoundCreateTableInfo *info) override; protected: void LoadDatabase() override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_expression_iterator.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { class ParsedExpressionIterator { public: static void EnumerateChildren(const ParsedExpression &expression, const std::function &callback); static void EnumerateChildren(ParsedExpression &expr, const std::function &callback); static void EnumerateChildren(ParsedExpression &expr, const std::function &child)> &callback); static void EnumerateTableRefChildren(TableRef &ref, const std::function &child)> &callback); static void EnumerateQueryNodeChildren(QueryNode &node, const std::function &child)> &callback); static void EnumerateQueryNodeModifiers(QueryNode &node, const std::function &child)> &callback); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/constraints/check_constraint.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! The CheckConstraint contains an expression that must evaluate to TRUE for //! every row in a table class CheckConstraint : public Constraint { public: static constexpr const ConstraintType TYPE = ConstraintType::CHECK; public: DUCKDB_API explicit CheckConstraint(unique_ptr expression); unique_ptr expression; public: DUCKDB_API string ToString() const override; DUCKDB_API unique_ptr Copy() const override; DUCKDB_API void Serialize(FieldWriter &writer) const override; DUCKDB_API static unique_ptr Deserialize(FieldReader &source); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/constraints/not_null_constraint.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class NotNullConstraint : public Constraint { public: static constexpr const ConstraintType TYPE = ConstraintType::NOT_NULL; public: DUCKDB_API explicit NotNullConstraint(LogicalIndex index); DUCKDB_API ~NotNullConstraint() override; //! Column index this constraint pertains to LogicalIndex index; public: DUCKDB_API string ToString() const override; DUCKDB_API unique_ptr Copy() const override; //! Serialize to a stand-alone binary blob DUCKDB_API void Serialize(FieldWriter &writer) const override; //! Deserializes a NotNullConstraint DUCKDB_API static unique_ptr Deserialize(FieldReader &source); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/constraints/unique_constraint.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class UniqueConstraint : public Constraint { public: static constexpr const ConstraintType TYPE = ConstraintType::UNIQUE; public: DUCKDB_API UniqueConstraint(LogicalIndex index, bool is_primary_key); DUCKDB_API UniqueConstraint(vector columns, bool is_primary_key); //! The index of the column for which this constraint holds. Only used when the constraint relates to a single //! column, equal to DConstants::INVALID_INDEX if not used LogicalIndex index; //! The set of columns for which this constraint holds by name. Only used when the index field is not used. vector columns; //! Whether or not this is a PRIMARY KEY constraint, or a UNIQUE constraint. bool is_primary_key; public: DUCKDB_API string ToString() const override; DUCKDB_API unique_ptr Copy() const override; //! Serialize to a stand-alone binary blob DUCKDB_API void Serialize(FieldWriter &writer) const override; //! Deserializes a ParsedConstraint DUCKDB_API static unique_ptr Deserialize(FieldReader &source); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/function/table/table_scan.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class DuckTableEntry; class TableCatalogEntry; struct TableScanBindData : public TableFunctionData { explicit TableScanBindData(DuckTableEntry &table) : table(table), is_index_scan(false), is_create_index(false) { } //! The table to scan DuckTableEntry &table; //! Whether or not the table scan is an index scan bool is_index_scan; //! Whether or not the table scan is for index creation bool is_create_index; //! The row ids to fetch (in case of an index scan) vector result_ids; public: bool Equals(const FunctionData &other_p) const override { auto &other = (const TableScanBindData &)other_p; return &other.table == &table && result_ids == other.result_ids; } }; //! The table scan function represents a sequential scan over one of DuckDB's base tables. struct TableScanFunction { static void RegisterFunction(BuiltinFunctions &set); static TableFunction GetFunction(); static TableFunction GetIndexScanFunction(); static optional_ptr GetTableEntry(const TableFunction &function, const optional_ptr bind_data); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/table_storage_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct ColumnSegmentInfo { idx_t row_group_index; idx_t column_id; string column_path; idx_t segment_idx; string segment_type; idx_t segment_start; idx_t segment_count; string compression_type; string segment_stats; bool has_updates; bool persistent; block_id_t block_id; idx_t block_offset; }; struct IndexInfo { bool is_unique; bool is_primary; bool is_foreign; unordered_set column_set; }; class TableStorageInfo { public: //! The (estimated) cardinality of the table idx_t cardinality = DConstants::INVALID_INDEX; //! Info of the indexes of a table vector index_info; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/operator/logical_get.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! LogicalGet represents a scan operation from a data source class LogicalGet : public LogicalOperator { public: static constexpr const LogicalOperatorType TYPE = LogicalOperatorType::LOGICAL_GET; public: LogicalGet(idx_t table_index, TableFunction function, unique_ptr bind_data, vector returned_types, vector returned_names); //! The table index in the current bind context idx_t table_index; //! The function that is called TableFunction function; //! The bind data of the function unique_ptr bind_data; //! The types of ALL columns that can be returned by the table function vector returned_types; //! The names of ALL columns that can be returned by the table function vector names; //! Bound column IDs vector column_ids; //! Columns that are used outside of the scan vector projection_ids; //! Filters pushed down for table scan TableFilterSet table_filters; //! The set of input parameters for the table function vector parameters; //! The set of named input parameters for the table function named_parameter_map_t named_parameters; //! The set of named input table types for the table-in table-out function vector input_table_types; //! The set of named input table names for the table-in table-out function vector input_table_names; //! For a table-in-out function, the set of projected input columns vector projected_input; string GetName() const override; string ParamsToString() const override; //! Returns the underlying table that is being scanned, or nullptr if there is none optional_ptr GetTable() const; public: vector GetColumnBindings() override; idx_t EstimateCardinality(ClientContext &context) override; void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(LogicalDeserializationState &state, FieldReader &reader); vector GetTableIndex() const override; //! Skips the serialization check in VerifyPlan bool SupportSerialization() const override { return function.verify_serialization; }; protected: void ResolveTypes() override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/operator/logical_projection.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! LogicalProjection represents the projection list in a SELECT clause class LogicalProjection : public LogicalOperator { public: static constexpr const LogicalOperatorType TYPE = LogicalOperatorType::LOGICAL_PROJECTION; public: LogicalProjection(idx_t table_index, vector> select_list); idx_t table_index; public: vector GetColumnBindings() override; void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(LogicalDeserializationState &state, FieldReader &reader); vector GetTableIndex() const override; string GetName() const override; protected: void ResolveTypes() override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/operator/logical_update.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class TableCatalogEntry; class LogicalUpdate : public LogicalOperator { public: static constexpr const LogicalOperatorType TYPE = LogicalOperatorType::LOGICAL_UPDATE; public: explicit LogicalUpdate(TableCatalogEntry &table); //! The base table to update TableCatalogEntry &table; //! table catalog index idx_t table_index; //! if returning option is used, return the update chunk bool return_chunk; vector columns; vector> bound_defaults; bool update_is_del_and_insert; public: void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(LogicalDeserializationState &state, FieldReader &reader); idx_t EstimateCardinality(ClientContext &context) override; string GetName() const override; protected: vector GetColumnBindings() override; void ResolveTypes() override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/function/scalar_macro_function.hpp // // //===----------------------------------------------------------------------===// //! The SelectStatement of the view namespace duckdb { class ScalarMacroFunction : public MacroFunction { public: static constexpr const MacroType TYPE = MacroType::SCALAR_MACRO; public: explicit ScalarMacroFunction(unique_ptr expression); ScalarMacroFunction(void); //! The macro expression unique_ptr expression; public: unique_ptr Copy() const override; string ToSQL(const string &schema, const string &name) const override; static unique_ptr Deserialize(FieldReader &reader); protected: void SerializeInternal(FieldWriter &writer) const override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_data/alter_scalar_function_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //===--------------------------------------------------------------------===// // Alter Scalar Function //===--------------------------------------------------------------------===// enum class AlterScalarFunctionType : uint8_t { INVALID = 0, ADD_FUNCTION_OVERLOADS = 1 }; struct AlterScalarFunctionInfo : public AlterInfo { AlterScalarFunctionInfo(AlterScalarFunctionType type, AlterEntryData data); virtual ~AlterScalarFunctionInfo() override; AlterScalarFunctionType alter_scalar_function_type; public: CatalogType GetCatalogType() const override; void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(FieldReader &reader); }; //===--------------------------------------------------------------------===// // AddScalarFunctionOverloadInfo //===--------------------------------------------------------------------===// struct AddScalarFunctionOverloadInfo : public AlterScalarFunctionInfo { AddScalarFunctionOverloadInfo(AlterEntryData data, ScalarFunctionSet new_overloads); ~AddScalarFunctionOverloadInfo() override; ScalarFunctionSet new_overloads; public: unique_ptr Copy() const override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/algorithm.hpp // // //===----------------------------------------------------------------------===// #include #include //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/dependency_manager.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/dependency.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class CatalogEntry; enum class DependencyType { DEPENDENCY_REGULAR = 0, DEPENDENCY_AUTOMATIC = 1, DEPENDENCY_OWNS = 2, DEPENDENCY_OWNED_BY = 3 }; struct Dependency { Dependency(CatalogEntry &entry, DependencyType dependency_type = DependencyType::DEPENDENCY_REGULAR) : // NOLINT: Allow implicit conversion from `CatalogEntry` entry(entry), dependency_type(dependency_type) { } //! The catalog entry this depends on reference entry; //! The type of dependency DependencyType dependency_type; }; struct DependencyHashFunction { uint64_t operator()(const Dependency &a) const { std::hash hash_func; return hash_func((void *)&a.entry.get()); } }; struct DependencyEquality { bool operator()(const Dependency &a, const Dependency &b) const { return RefersToSameObject(a.entry, b.entry); } }; using dependency_set_t = unordered_set; } // namespace duckdb #include namespace duckdb { class DuckCatalog; class ClientContext; class DependencyList; //! The DependencyManager is in charge of managing dependencies between catalog entries class DependencyManager { friend class CatalogSet; public: explicit DependencyManager(DuckCatalog &catalog); //! Erase the object from the DependencyManager; this should only happen when the object itself is destroyed void EraseObject(CatalogEntry &object); //! Scans all dependencies, returning pairs of (object, dependent) void Scan(const std::function &callback); void AddOwnership(CatalogTransaction transaction, CatalogEntry &owner, CatalogEntry &entry); private: DuckCatalog &catalog; //! Map of objects that DEPEND on [object], i.e. [object] can only be deleted when all entries in the dependency map //! are deleted. catalog_entry_map_t dependents_map; //! Map of objects that the source object DEPENDS on, i.e. when any of the entries in the vector perform a CASCADE //! drop then [object] is deleted as well catalog_entry_map_t dependencies_map; private: void AddObject(CatalogTransaction transaction, CatalogEntry &object, DependencyList &dependencies); void DropObject(CatalogTransaction transaction, CatalogEntry &object, bool cascade); void AlterObject(CatalogTransaction transaction, CatalogEntry &old_obj, CatalogEntry &new_obj); void EraseObjectInternal(CatalogEntry &object); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_data/alter_scalar_function_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //===--------------------------------------------------------------------===// // Alter Table Function //===--------------------------------------------------------------------===// enum class AlterTableFunctionType : uint8_t { INVALID = 0, ADD_FUNCTION_OVERLOADS = 1 }; struct AlterTableFunctionInfo : public AlterInfo { AlterTableFunctionInfo(AlterTableFunctionType type, AlterEntryData data); virtual ~AlterTableFunctionInfo() override; AlterTableFunctionType alter_table_function_type; public: CatalogType GetCatalogType() const override; void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(FieldReader &reader); }; //===--------------------------------------------------------------------===// // AddTableFunctionOverloadInfo //===--------------------------------------------------------------------===// struct AddTableFunctionOverloadInfo : public AlterTableFunctionInfo { AddTableFunctionOverloadInfo(AlterEntryData data, TableFunctionSet new_overloads); ~AddTableFunctionOverloadInfo() override; TableFunctionSet new_overloads; public: unique_ptr Copy() const override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/dcatalog.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! The Catalog object represents the catalog of the database. class DuckCatalog : public Catalog { public: explicit DuckCatalog(AttachedDatabase &db); ~DuckCatalog(); public: bool IsDuckCatalog() override; void Initialize(bool load_builtin) override; string GetCatalogType() override { return "duckdb"; } DependencyManager &GetDependencyManager() { return *dependency_manager; } mutex &GetWriteLock() { return write_lock; } public: DUCKDB_API optional_ptr CreateSchema(CatalogTransaction transaction, CreateSchemaInfo &info) override; DUCKDB_API void ScanSchemas(ClientContext &context, std::function callback) override; DUCKDB_API void ScanSchemas(std::function callback); DUCKDB_API optional_ptr GetSchema(CatalogTransaction transaction, const string &schema_name, OnEntryNotFound if_not_found, QueryErrorContext error_context = QueryErrorContext()) override; DUCKDB_API unique_ptr PlanCreateTableAs(ClientContext &context, LogicalCreateTable &op, unique_ptr plan) override; DUCKDB_API unique_ptr PlanInsert(ClientContext &context, LogicalInsert &op, unique_ptr plan) override; DUCKDB_API unique_ptr PlanDelete(ClientContext &context, LogicalDelete &op, unique_ptr plan) override; DUCKDB_API unique_ptr PlanUpdate(ClientContext &context, LogicalUpdate &op, unique_ptr plan) override; DUCKDB_API unique_ptr BindCreateIndex(Binder &binder, CreateStatement &stmt, TableCatalogEntry &table, unique_ptr plan) override; DatabaseSize GetDatabaseSize(ClientContext &context) override; DUCKDB_API bool InMemory() override; DUCKDB_API string GetDBPath() override; private: DUCKDB_API void DropSchema(CatalogTransaction transaction, DropInfo &info); DUCKDB_API void DropSchema(ClientContext &context, DropInfo &info) override; optional_ptr CreateSchemaInternal(CatalogTransaction transaction, CreateSchemaInfo &info); void Verify() override; private: //! The DependencyManager manages dependencies between different catalog objects unique_ptr dependency_manager; //! Write lock for the catalog mutex write_lock; //! The catalog set holding the schemas unique_ptr schemas; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/transaction/transaction_manager.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class AttachedDatabase; class ClientContext; class Catalog; struct ClientLockWrapper; class DatabaseInstance; class Transaction; //! The Transaction Manager is responsible for creating and managing //! transactions class TransactionManager { public: explicit TransactionManager(AttachedDatabase &db); virtual ~TransactionManager(); //! Start a new transaction virtual Transaction *StartTransaction(ClientContext &context) = 0; //! Commit the given transaction. Returns a non-empty error message on failure. virtual string CommitTransaction(ClientContext &context, Transaction *transaction) = 0; //! Rollback the given transaction virtual void RollbackTransaction(Transaction *transaction) = 0; virtual void Checkpoint(ClientContext &context, bool force = false) = 0; static TransactionManager &Get(AttachedDatabase &db); virtual bool IsDuckTransactionManager() { return false; } AttachedDatabase &GetDB() { return db; } protected: //! The attached database AttachedDatabase &db; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/transaction/duck_transaction.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class DuckTransaction : public Transaction { public: DuckTransaction(TransactionManager &manager, ClientContext &context, transaction_t start_time, transaction_t transaction_id); ~DuckTransaction(); //! The start timestamp of this transaction transaction_t start_time; //! The transaction id of this transaction transaction_t transaction_id; //! The commit id of this transaction, if it has successfully been committed transaction_t commit_id; //! Map of all sequences that were used during the transaction and the value they had in this transaction unordered_map sequence_usage; //! Highest active query when the transaction finished, used for cleaning up transaction_t highest_active_query; public: static DuckTransaction &Get(ClientContext &context, AttachedDatabase &db); static DuckTransaction &Get(ClientContext &context, Catalog &catalog); LocalStorage &GetLocalStorage(); void PushCatalogEntry(CatalogEntry &entry, data_ptr_t extra_data = nullptr, idx_t extra_data_size = 0); //! Commit the current transaction with the given commit identifier. Returns an error message if the transaction //! commit failed, or an empty string if the commit was sucessful string Commit(AttachedDatabase &db, transaction_t commit_id, bool checkpoint) noexcept; //! Returns whether or not a commit of this transaction should trigger an automatic checkpoint bool AutomaticCheckpoint(AttachedDatabase &db); //! Rollback void Rollback() noexcept; //! Cleanup the undo buffer void Cleanup(); bool ChangesMade(); void PushDelete(DataTable &table, ChunkVectorInfo *vinfo, row_t rows[], idx_t count, idx_t base_row); void PushAppend(DataTable &table, idx_t row_start, idx_t row_count); UpdateInfo *CreateUpdateInfo(idx_t type_size, idx_t entries); bool IsDuckTransaction() const override { return true; } private: //! The undo buffer is used to store old versions of rows that are updated //! or deleted UndoBuffer undo_buffer; //! The set of uncommitted appends for the transaction unique_ptr storage; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/mapping_value.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct AlterInfo; class ClientContext; struct EntryIndex { EntryIndex() : catalog(nullptr), index(DConstants::INVALID_INDEX) { } EntryIndex(CatalogSet &catalog, idx_t index) : catalog(&catalog), index(index) { auto entry = catalog.entries.find(index); if (entry == catalog.entries.end()) { throw InternalException("EntryIndex - Catalog entry not found in constructor!?"); } catalog.entries[index].reference_count++; } ~EntryIndex() { if (!catalog) { return; } auto entry = catalog->entries.find(index); D_ASSERT(entry != catalog->entries.end()); auto remaining_ref = --entry->second.reference_count; if (remaining_ref == 0) { catalog->entries.erase(index); } catalog = nullptr; } // disable copy constructors EntryIndex(const EntryIndex &other) = delete; EntryIndex &operator=(const EntryIndex &) = delete; //! enable move constructors EntryIndex(EntryIndex &&other) noexcept { catalog = nullptr; index = DConstants::INVALID_INDEX; std::swap(catalog, other.catalog); std::swap(index, other.index); } EntryIndex &operator=(EntryIndex &&other) noexcept { std::swap(catalog, other.catalog); std::swap(index, other.index); return *this; } unique_ptr &GetEntry() { auto entry = catalog->entries.find(index); if (entry == catalog->entries.end()) { throw InternalException("EntryIndex - Catalog entry not found!?"); } return entry->second.entry; } idx_t GetIndex() { return index; } EntryIndex Copy() { if (catalog) { return EntryIndex(*catalog, index); } else { return EntryIndex(); } } private: CatalogSet *catalog; idx_t index; }; struct MappingValue { explicit MappingValue(EntryIndex index_p) : index(std::move(index_p)), timestamp(0), deleted(false), parent(nullptr) { } EntryIndex index; transaction_t timestamp; bool deleted; unique_ptr child; MappingValue *parent; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parser.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/simplified_token.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! Simplified tokens are a simplified (dense) representation of the lexer //! Used for simple syntax highlighting in the tests enum class SimplifiedTokenType : uint8_t { SIMPLIFIED_TOKEN_IDENTIFIER, SIMPLIFIED_TOKEN_NUMERIC_CONSTANT, SIMPLIFIED_TOKEN_STRING_CONSTANT, SIMPLIFIED_TOKEN_OPERATOR, SIMPLIFIED_TOKEN_KEYWORD, SIMPLIFIED_TOKEN_COMMENT }; struct SimplifiedToken { SimplifiedTokenType type; idx_t start; }; enum class KeywordCategory : uint8_t { KEYWORD_RESERVED, KEYWORD_UNRESERVED, KEYWORD_TYPE_FUNC, KEYWORD_COL_NAME }; struct ParserKeyword { string name; KeywordCategory category; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parser_options.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class ParserExtension; struct ParserOptions { bool preserve_identifier_case = true; bool integer_division = false; idx_t max_expression_depth = 1000; const vector *extensions = nullptr; }; } // namespace duckdb namespace duckdb_libpgquery { struct PGNode; struct PGList; } // namespace duckdb_libpgquery namespace duckdb { //! The parser is responsible for parsing the query and converting it into a set //! of parsed statements. The parsed statements can then be converted into a //! plan and executed. class Parser { public: Parser(ParserOptions options = ParserOptions()); //! The parsed SQL statements from an invocation to ParseQuery. vector> statements; public: //! Attempts to parse a query into a series of SQL statements. Returns //! whether or not the parsing was successful. If the parsing was //! successful, the parsed statements will be stored in the statements //! variable. void ParseQuery(const string &query); //! Tokenize a query, returning the raw tokens together with their locations static vector Tokenize(const string &query); //! Returns true if the given text matches a keyword of the parser static bool IsKeyword(const string &text); //! Returns a list of all keywords in the parser static vector KeywordList(); //! Parses a list of expressions (i.e. the list found in a SELECT clause) DUCKDB_API static vector> ParseExpressionList(const string &select_list, ParserOptions options = ParserOptions()); //! Parses a list as found in an ORDER BY expression (i.e. including optional ASCENDING/DESCENDING modifiers) static vector ParseOrderList(const string &select_list, ParserOptions options = ParserOptions()); //! Parses an update list (i.e. the list found in the SET clause of an UPDATE statement) static void ParseUpdateList(const string &update_list, vector &update_columns, vector> &expressions, ParserOptions options = ParserOptions()); //! Parses a VALUES list (i.e. the list of expressions after a VALUES clause) static vector>> ParseValuesList(const string &value_list, ParserOptions options = ParserOptions()); //! Parses a column list (i.e. as found in a CREATE TABLE statement) static ColumnList ParseColumnList(const string &column_list, ParserOptions options = ParserOptions()); static bool StripUnicodeSpaces(const string &query_str, string &new_query); private: ParserOptions options; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/function/table_macro_function.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class TableMacroFunction : public MacroFunction { public: static constexpr const MacroType TYPE = MacroType::TABLE_MACRO; public: explicit TableMacroFunction(unique_ptr query_node); TableMacroFunction(void); //! The main query node unique_ptr query_node; public: unique_ptr Copy() const override; string ToSQL(const string &schema, const string &name) const override; static unique_ptr Deserialize(FieldReader &reader); protected: void SerializeInternal(FieldWriter &writer) const override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/default/builtin_types/types.hpp // // //===----------------------------------------------------------------------===// // This file is generated by scripts/generate_builtin_types.py //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/array.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { using std::array; } namespace duckdb { struct DefaultType { const char *name; LogicalTypeId type; }; using builtin_type_array = std::array; static constexpr const builtin_type_array BUILTIN_TYPES{{ {"decimal", LogicalTypeId::DECIMAL}, {"dec", LogicalTypeId::DECIMAL}, {"numeric", LogicalTypeId::DECIMAL}, {"time", LogicalTypeId::TIME}, {"date", LogicalTypeId::DATE}, {"timestamp", LogicalTypeId::TIMESTAMP}, {"datetime", LogicalTypeId::TIMESTAMP}, {"timestamp_us", LogicalTypeId::TIMESTAMP}, {"timestamp_ms", LogicalTypeId::TIMESTAMP_MS}, {"timestamp_ns", LogicalTypeId::TIMESTAMP_NS}, {"timestamp_s", LogicalTypeId::TIMESTAMP_SEC}, {"timestamptz", LogicalTypeId::TIMESTAMP_TZ}, {"timetz", LogicalTypeId::TIME_TZ}, {"interval", LogicalTypeId::INTERVAL}, {"varchar", LogicalTypeId::VARCHAR}, {"bpchar", LogicalTypeId::VARCHAR}, {"string", LogicalTypeId::VARCHAR}, {"char", LogicalTypeId::VARCHAR}, {"nvarchar", LogicalTypeId::VARCHAR}, {"text", LogicalTypeId::VARCHAR}, {"blob", LogicalTypeId::BLOB}, {"bytea", LogicalTypeId::BLOB}, {"varbinary", LogicalTypeId::BLOB}, {"binary", LogicalTypeId::BLOB}, {"hugeint", LogicalTypeId::HUGEINT}, {"int128", LogicalTypeId::HUGEINT}, {"bigint", LogicalTypeId::BIGINT}, {"oid", LogicalTypeId::BIGINT}, {"long", LogicalTypeId::BIGINT}, {"int8", LogicalTypeId::BIGINT}, {"int64", LogicalTypeId::BIGINT}, {"ubigint", LogicalTypeId::UBIGINT}, {"uint64", LogicalTypeId::UBIGINT}, {"integer", LogicalTypeId::INTEGER}, {"int", LogicalTypeId::INTEGER}, {"int4", LogicalTypeId::INTEGER}, {"signed", LogicalTypeId::INTEGER}, {"integral", LogicalTypeId::INTEGER}, {"int32", LogicalTypeId::INTEGER}, {"uinteger", LogicalTypeId::UINTEGER}, {"uint32", LogicalTypeId::UINTEGER}, {"smallint", LogicalTypeId::SMALLINT}, {"int2", LogicalTypeId::SMALLINT}, {"short", LogicalTypeId::SMALLINT}, {"int16", LogicalTypeId::SMALLINT}, {"usmallint", LogicalTypeId::USMALLINT}, {"uint16", LogicalTypeId::USMALLINT}, {"tinyint", LogicalTypeId::TINYINT}, {"int1", LogicalTypeId::TINYINT}, {"utinyint", LogicalTypeId::UTINYINT}, {"uint8", LogicalTypeId::UTINYINT}, {"struct", LogicalTypeId::STRUCT}, {"row", LogicalTypeId::STRUCT}, {"list", LogicalTypeId::LIST}, {"map", LogicalTypeId::MAP}, {"union", LogicalTypeId::UNION}, {"bit", LogicalTypeId::BIT}, {"bitstring", LogicalTypeId::BIT}, {"boolean", LogicalTypeId::BOOLEAN}, {"bool", LogicalTypeId::BOOLEAN}, {"logical", LogicalTypeId::BOOLEAN}, {"uuid", LogicalTypeId::UUID}, {"guid", LogicalTypeId::UUID}, {"enum", LogicalTypeId::ENUM}, {"null", LogicalTypeId::SQLNULL}, {"float", LogicalTypeId::FLOAT}, {"real", LogicalTypeId::FLOAT}, {"float4", LogicalTypeId::FLOAT}, {"double", LogicalTypeId::DOUBLE}, {"float8", LogicalTypeId::DOUBLE} }}; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/core_functions/core_functions.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class Catalog; struct CatalogTransaction; struct CoreFunctions { static void RegisterFunctions(Catalog &catalog, CatalogTransaction transaction); }; } // namespace duckdb // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #ifndef DUCKDB_ADBC_INIT #define DUCKDB_ADBC_INIT #ifdef __cplusplus extern "C" { #endif //! We gotta leak the symbols of the init function duckdb_adbc::AdbcStatusCode duckdb_adbc_init(size_t count, struct duckdb_adbc::AdbcDriver *driver, struct duckdb_adbc::AdbcError *error); #ifdef __cplusplus } #endif #endif // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #ifdef __cplusplus extern "C" { #endif #ifndef ADBC_DRIVER_MANAGER_H #define ADBC_DRIVER_MANAGER_H namespace duckdb_adbc { /// \brief Common entry point for drivers via the driver manager. /// /// The driver manager can fill in default implementations of some /// ADBC functions for drivers. Drivers must implement a minimum level /// of functionality for this to be possible, however, and some /// functions must be implemented by the driver. /// /// \param[in] driver_name An identifier for the driver (e.g. a path to a /// shared library on Linux). /// \param[in] entrypoint An identifier for the entrypoint (e.g. the /// symbol to call for AdbcDriverInitFunc on Linux). /// \param[in] version The ADBC revision to attempt to initialize. /// \param[out] driver The table of function pointers to initialize. /// \param[out] error An optional location to return an error message /// if necessary. ADBC_EXPORT AdbcStatusCode AdbcLoadDriver(const char *driver_name, const char *entrypoint, int version, void *driver, struct AdbcError *error); /// \brief Common entry point for drivers via the driver manager. /// /// The driver manager can fill in default implementations of some /// ADBC functions for drivers. Drivers must implement a minimum level /// of functionality for this to be possible, however, and some /// functions must be implemented by the driver. /// /// \param[in] init_func The entrypoint to call. /// \param[in] version The ADBC revision to attempt to initialize. /// \param[out] driver The table of function pointers to initialize. /// \param[out] error An optional location to return an error message /// if necessary. ADBC_EXPORT AdbcStatusCode AdbcLoadDriverFromInitFunc(AdbcDriverInitFunc init_func, int version, void *driver, struct AdbcError *error); /// \brief Set the AdbcDriverInitFunc to use. /// /// This is an extension to the ADBC API. The driver manager shims /// the AdbcDatabase* functions to allow you to specify the /// driver/entrypoint dynamically. This function lets you set the /// entrypoint explicitly, for applications that can dynamically /// load drivers on their own. ADBC_EXPORT AdbcStatusCode AdbcDriverManagerDatabaseSetInitFunc(struct AdbcDatabase *database, AdbcDriverInitFunc init_func, struct AdbcError *error); /// \brief Get a human-friendly description of a status code. ADBC_EXPORT const char *AdbcStatusCodeMessage(AdbcStatusCode code); #endif // ADBC_DRIVER_MANAGER_H #ifdef __cplusplus } #endif } // namespace duckdb_adbc //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/arrow/arrow_appender.hpp // // //===----------------------------------------------------------------------===// struct ArrowSchema; namespace duckdb { struct ArrowAppendData; //! The ArrowAppender class can be used to incrementally construct an arrow array by appending data chunks into it class ArrowAppender { public: DUCKDB_API ArrowAppender(vector types, idx_t initial_capacity, ArrowOptions options); DUCKDB_API ~ArrowAppender(); //! Append a data chunk to the underlying arrow array DUCKDB_API void Append(DataChunk &input, idx_t from, idx_t to, idx_t input_size); //! Returns the underlying arrow array DUCKDB_API ArrowArray Finalize(); private: //! The types of the chunks that will be appended in vector types; //! The root arrow append data vector> root_data; //! The total row count that has been appended idx_t row_count = 0; ArrowOptions options; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/arrow/arrow_buffer.hpp // // //===----------------------------------------------------------------------===// struct ArrowSchema; namespace duckdb { struct ArrowBuffer { static constexpr const idx_t MINIMUM_SHRINK_SIZE = 4096; ArrowBuffer() : dataptr(nullptr), count(0), capacity(0) { } ~ArrowBuffer() { if (!dataptr) { return; } free(dataptr); dataptr = nullptr; count = 0; capacity = 0; } // disable copy constructors ArrowBuffer(const ArrowBuffer &other) = delete; ArrowBuffer &operator=(const ArrowBuffer &) = delete; //! enable move constructors ArrowBuffer(ArrowBuffer &&other) noexcept { std::swap(dataptr, other.dataptr); std::swap(count, other.count); std::swap(capacity, other.capacity); } ArrowBuffer &operator=(ArrowBuffer &&other) noexcept { std::swap(dataptr, other.dataptr); std::swap(count, other.count); std::swap(capacity, other.capacity); return *this; } void reserve(idx_t bytes) { // NOLINT auto new_capacity = NextPowerOfTwo(bytes); if (new_capacity <= capacity) { return; } ReserveInternal(new_capacity); } void resize(idx_t bytes) { // NOLINT reserve(bytes); count = bytes; } void resize(idx_t bytes, data_t value) { // NOLINT reserve(bytes); for (idx_t i = count; i < bytes; i++) { dataptr[i] = value; } count = bytes; } idx_t size() { // NOLINT return count; } data_ptr_t data() { // NOLINT return dataptr; } template T *GetData() { return reinterpret_cast(data()); } private: void ReserveInternal(idx_t bytes) { if (dataptr) { dataptr = data_ptr_cast(realloc(dataptr, bytes)); } else { dataptr = data_ptr_cast(malloc(bytes)); } capacity = bytes; } private: data_ptr_t dataptr = nullptr; idx_t count = 0; idx_t capacity = 0; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/function/table/arrow.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/thread.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { using std::thread; } namespace duckdb { //===--------------------------------------------------------------------===// // Arrow Variable Size Types //===--------------------------------------------------------------------===// enum class ArrowVariableSizeType : uint8_t { FIXED_SIZE = 0, NORMAL = 1, SUPER_SIZE = 2 }; //===--------------------------------------------------------------------===// // Arrow Time/Date Types //===--------------------------------------------------------------------===// enum class ArrowDateTimeType : uint8_t { MILLISECONDS = 0, MICROSECONDS = 1, NANOSECONDS = 2, SECONDS = 3, DAYS = 4, MONTHS = 5, MONTH_DAY_NANO = 6 }; struct ArrowInterval { int32_t months; int32_t days; int64_t nanoseconds; inline bool operator==(const ArrowInterval &rhs) const { return this->days == rhs.days && this->months == rhs.months && this->nanoseconds == rhs.nanoseconds; } }; struct ArrowConvertData { ArrowConvertData(LogicalType type) : dictionary_type(type) {}; ArrowConvertData() {}; //! Hold type of dictionary LogicalType dictionary_type; //! If its a variable size type (e.g., strings, blobs, lists) holds which type it is vector> variable_sz_type; //! If this is a date/time holds its precision vector date_time_precision; }; struct ArrowProjectedColumns { unordered_map projection_map; vector columns; }; struct ArrowStreamParameters { ArrowProjectedColumns projected_columns; TableFilterSet *filters; }; typedef unique_ptr (*stream_factory_produce_t)(uintptr_t stream_factory_ptr, ArrowStreamParameters ¶meters); typedef void (*stream_factory_get_schema_t)(uintptr_t stream_factory_ptr, ArrowSchemaWrapper &schema); struct ArrowScanFunctionData : public PyTableFunctionData { ArrowScanFunctionData(stream_factory_produce_t scanner_producer_p, uintptr_t stream_factory_ptr_p) : lines_read(0), stream_factory_ptr(stream_factory_ptr_p), scanner_producer(scanner_producer_p) { } //! This holds the original list type (col_idx, [ArrowListType,size]) unordered_map> arrow_convert_data; vector all_types; atomic lines_read; ArrowSchemaWrapper schema_root; idx_t rows_per_thread; //! Pointer to the scanner factory uintptr_t stream_factory_ptr; //! Pointer to the scanner factory produce stream_factory_produce_t scanner_producer; }; struct ArrowScanLocalState : public LocalTableFunctionState { explicit ArrowScanLocalState(unique_ptr current_chunk) : chunk(current_chunk.release()) { } unique_ptr stream; shared_ptr chunk; idx_t chunk_offset = 0; idx_t batch_index = 0; vector column_ids; //! Store child vectors for Arrow Dictionary Vectors (col-idx,vector) unordered_map> arrow_dictionary_vectors; TableFilterSet *filters = nullptr; //! The DataChunk containing all read columns (even filter columns that are immediately removed) DataChunk all_columns; }; struct ArrowScanGlobalState : public GlobalTableFunctionState { unique_ptr stream; mutex main_mutex; idx_t max_threads = 1; idx_t batch_index = 0; bool done = false; vector projection_ids; vector scanned_types; idx_t MaxThreads() const override { return max_threads; } bool CanRemoveFilterColumns() const { return !projection_ids.empty(); } }; struct ArrowTableFunction { public: static void RegisterFunction(BuiltinFunctions &set); public: //! Binds an arrow table static unique_ptr ArrowScanBind(ClientContext &context, TableFunctionBindInput &input, vector &return_types, vector &names); //! Actual conversion from Arrow to DuckDB static void ArrowToDuckDB(ArrowScanLocalState &scan_state, std::unordered_map> &arrow_convert_data, DataChunk &output, idx_t start, bool arrow_scan_is_projected = true); //! Get next scan state static bool ArrowScanParallelStateNext(ClientContext &context, const FunctionData *bind_data_p, ArrowScanLocalState &state, ArrowScanGlobalState ¶llel_state); //! Initialize Global State static unique_ptr ArrowScanInitGlobal(ClientContext &context, TableFunctionInitInput &input); //! Initialize Local State static unique_ptr ArrowScanInitLocalInternal(ClientContext &context, TableFunctionInitInput &input, GlobalTableFunctionState *global_state); static unique_ptr ArrowScanInitLocal(ExecutionContext &context, TableFunctionInitInput &input, GlobalTableFunctionState *global_state); //! Scan Function static void ArrowScanFunction(ClientContext &context, TableFunctionInput &data, DataChunk &output); protected: //! Defines Maximum Number of Threads static idx_t ArrowScanMaxThreads(ClientContext &context, const FunctionData *bind_data); //! Allows parallel Create Table / Insertion static idx_t ArrowGetBatchIndex(ClientContext &context, const FunctionData *bind_data_p, LocalTableFunctionState *local_state, GlobalTableFunctionState *global_state); //! -----Utility Functions:----- //! Gets Arrow Table's Cardinality static unique_ptr ArrowScanCardinality(ClientContext &context, const FunctionData *bind_data); //! Gets the progress on the table scan, used for Progress Bars static double ArrowProgress(ClientContext &context, const FunctionData *bind_data, const GlobalTableFunctionState *global_state); //! Renames repeated columns and case sensitive columns static void RenameArrowColumns(vector &names); //! Helper function to get the DuckDB logical type static LogicalType GetArrowLogicalType(ArrowSchema &schema, std::unordered_map> &arrow_convert_data, idx_t col_idx); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/bit.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! The Bit class is a static class that holds helper functions for the BIT type. class Bit { public: //! Returns the number of bits in the bit string DUCKDB_API static idx_t BitLength(string_t bits); //! Returns the number of set bits in the bit string DUCKDB_API static idx_t BitCount(string_t bits); //! Returns the number of bytes in the bit string DUCKDB_API static idx_t OctetLength(string_t bits); //! Extracts the nth bit from bit string; the first (leftmost) bit is indexed 0 DUCKDB_API static idx_t GetBit(string_t bit_string, idx_t n); //! Sets the nth bit in bit string to newvalue; the first (leftmost) bit is indexed 0 DUCKDB_API static void SetBit(string_t &bit_string, idx_t n, idx_t new_value); //! Returns first starting index of the specified substring within bits, or zero if it's not present. DUCKDB_API static idx_t BitPosition(string_t substring, string_t bits); //! Converts bits to a string, writing the output to the designated output string. //! The string needs to have space for at least GetStringSize(bits) bytes. DUCKDB_API static void ToString(string_t bits, char *output); DUCKDB_API static string ToString(string_t str); //! Returns the bit size of a string -> bit conversion DUCKDB_API static bool TryGetBitStringSize(string_t str, idx_t &result_size, string *error_message); //! Convert a string to a bit. This function should ONLY be called after calling GetBitSize, since it does NOT //! perform data validation. DUCKDB_API static void ToBit(string_t str, string_t &output); DUCKDB_API static string ToBit(string_t str); //! Creates a new bitstring of determined length DUCKDB_API static void BitString(const string_t &input, const idx_t &len, string_t &result); DUCKDB_API static void SetEmptyBitString(string_t &target, string_t &input); DUCKDB_API static void SetEmptyBitString(string_t &target, idx_t len); DUCKDB_API static idx_t ComputeBitstringLen(idx_t len); DUCKDB_API static void RightShift(const string_t &bit_string, const idx_t &shif, string_t &result); DUCKDB_API static void LeftShift(const string_t &bit_string, const idx_t &shift, string_t &result); DUCKDB_API static void BitwiseAnd(const string_t &rhs, const string_t &lhs, string_t &result); DUCKDB_API static void BitwiseOr(const string_t &rhs, const string_t &lhs, string_t &result); DUCKDB_API static void BitwiseXor(const string_t &rhs, const string_t &lhs, string_t &result); DUCKDB_API static void BitwiseNot(const string_t &rhs, string_t &result); DUCKDB_API static void Verify(const string_t &input); private: static void Finalize(string_t &str); static idx_t GetBitInternal(string_t bit_string, idx_t n); static void SetBitInternal(string_t &bit_string, idx_t n, idx_t new_value); static idx_t GetBitIndex(idx_t n); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/sel_cache.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! Selection vector cache used for caching vector slices struct SelCache { unordered_map> cache; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/vector_cache.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class Allocator; class Vector; //! The VectorCache holds cached data that allows for re-use of the same memory by vectors class VectorCache { public: //! Instantiate a vector cache with the given type and capacity DUCKDB_API explicit VectorCache(Allocator &allocator, const LogicalType &type, idx_t capacity = STANDARD_VECTOR_SIZE); buffer_ptr buffer; public: void ResetFromCache(Vector &result) const; const LogicalType &GetType() const; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/arrow/result_arrow_wrapper.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class ResultArrowArrayStreamWrapper { public: explicit ResultArrowArrayStreamWrapper(unique_ptr result, idx_t batch_size); ArrowArrayStream stream; unique_ptr result; PreservedError last_error; idx_t batch_size; vector column_types; vector column_names; private: static int MyStreamGetSchema(struct ArrowArrayStream *stream, struct ArrowSchema *out); static int MyStreamGetNext(struct ArrowArrayStream *stream, struct ArrowArray *out); static void MyStreamRelease(struct ArrowArrayStream *stream); static const char *MyStreamGetLastError(struct ArrowArrayStream *stream); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/bind_helpers.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class Value; Value ConvertVectorToValue(vector set); vector ParseColumnList(const vector &set, vector &names, const string &option_name); vector ParseColumnList(const Value &value, vector &names, const string &option_name); vector ParseColumnsOrdered(const vector &set, vector &names, const string &loption); vector ParseColumnsOrdered(const Value &value, vector &names, const string &loption); } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/box_renderer.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/main/query_profiler.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { class ClientContext; class ExpressionExecutor; class PhysicalOperator; class SQLStatement; //! The ExpressionInfo keeps information related to an expression struct ExpressionInfo { explicit ExpressionInfo() : hasfunction(false) { } // A vector of children vector> children; // Extract ExpressionInformation from a given expression state void ExtractExpressionsRecursive(unique_ptr &state); //! Whether or not expression has function bool hasfunction; //! The function Name string function_name; //! The function time uint64_t function_time = 0; //! Count the number of ALL tuples uint64_t tuples_count = 0; //! Count the number of tuples sampled uint64_t sample_tuples_count = 0; }; //! The ExpressionRootInfo keeps information related to the root of an expression tree struct ExpressionRootInfo { ExpressionRootInfo(ExpressionExecutorState &executor, string name); //! Count the number of time the executor called uint64_t total_count = 0; //! Count the number of time the executor called since last sampling uint64_t current_count = 0; //! Count the number of samples uint64_t sample_count = 0; //! Count the number of tuples in all samples uint64_t sample_tuples_count = 0; //! Count the number of tuples processed by this executor uint64_t tuples_count = 0; //! A vector which contain the pointer to root of each expression tree unique_ptr root; //! Name string name; //! Elapsed time double time; //! Extra Info string extra_info; }; struct ExpressionExecutorInfo { explicit ExpressionExecutorInfo() {}; explicit ExpressionExecutorInfo(ExpressionExecutor &executor, const string &name, int id); //! A vector which contain the pointer to all ExpressionRootInfo vector> roots; //! Id, it will be used as index for executors_info vector int id; }; struct OperatorInformation { explicit OperatorInformation(double time_ = 0, idx_t elements_ = 0) : time(time_), elements(elements_) { } double time = 0; idx_t elements = 0; string name; //! A vector of Expression Executor Info vector> executors_info; }; //! The OperatorProfiler measures timings of individual operators class OperatorProfiler { friend class QueryProfiler; public: DUCKDB_API explicit OperatorProfiler(bool enabled); DUCKDB_API void StartOperator(optional_ptr phys_op); DUCKDB_API void EndOperator(optional_ptr chunk); DUCKDB_API void Flush(const PhysicalOperator &phys_op, ExpressionExecutor &expression_executor, const string &name, int id); ~OperatorProfiler() { } private: void AddTiming(const PhysicalOperator &op, double time, idx_t elements); //! Whether or not the profiler is enabled bool enabled; //! The timer used to time the execution time of the individual Physical Operators Profiler op; //! The stack of Physical Operators that are currently active optional_ptr active_operator; //! A mapping of physical operators to recorded timings reference_map_t timings; }; //! The QueryProfiler can be used to measure timings of queries class QueryProfiler { public: DUCKDB_API QueryProfiler(ClientContext &context); public: struct TreeNode { PhysicalOperatorType type; string name; string extra_info; OperatorInformation info; vector> children; idx_t depth = 0; }; // Propagate save_location, enabled, detailed_enabled and automatic_print_format. void Propagate(QueryProfiler &qp); using TreeMap = reference_map_t>; private: unique_ptr CreateTree(const PhysicalOperator &root, idx_t depth = 0); void Render(const TreeNode &node, std::ostream &str) const; public: DUCKDB_API bool IsEnabled() const; DUCKDB_API bool IsDetailedEnabled() const; DUCKDB_API ProfilerPrintFormat GetPrintFormat() const; DUCKDB_API bool PrintOptimizerOutput() const; DUCKDB_API string GetSaveLocation() const; DUCKDB_API static QueryProfiler &Get(ClientContext &context); DUCKDB_API void StartQuery(string query, bool is_explain_analyze = false, bool start_at_optimizer = false); DUCKDB_API void EndQuery(); DUCKDB_API void StartExplainAnalyze(); //! Adds the timings gathered by an OperatorProfiler to this query profiler DUCKDB_API void Flush(OperatorProfiler &profiler); DUCKDB_API void StartPhase(string phase); DUCKDB_API void EndPhase(); DUCKDB_API void Initialize(const PhysicalOperator &root); DUCKDB_API string QueryTreeToString() const; DUCKDB_API void QueryTreeToStream(std::ostream &str) const; DUCKDB_API void Print(); //! return the printed as a string. Unlike ToString, which is always formatted as a string, //! the return value is formatted based on the current print format (see GetPrintFormat()). DUCKDB_API string ToString() const; DUCKDB_API string ToJSON() const; DUCKDB_API void WriteToFile(const char *path, string &info) const; idx_t OperatorSize() { return tree_map.size(); } void Finalize(TreeNode &node); private: ClientContext &context; //! Whether or not the query profiler is running bool running; //! The lock used for flushing information from a thread into the global query profiler mutex flush_lock; //! Whether or not the query requires profiling bool query_requires_profiling; //! The root of the query tree unique_ptr root; //! The query string string query; //! The timer used to time the execution time of the entire query Profiler main_query; //! A map of a Physical Operator pointer to a tree node TreeMap tree_map; //! Whether or not we are running as part of a explain_analyze query bool is_explain_analyze; public: const TreeMap &GetTreeMap() const { return tree_map; } private: //! The timer used to time the individual phases of the planning process Profiler phase_profiler; //! A mapping of the phase names to the timings using PhaseTimingStorage = unordered_map; PhaseTimingStorage phase_timings; using PhaseTimingItem = PhaseTimingStorage::value_type; //! The stack of currently active phases vector phase_stack; private: vector GetOrderedPhaseTimings() const; //! Check whether or not an operator type requires query profiling. If none of the ops in a query require profiling //! no profiling information is output. bool OperatorRequiresProfiling(PhysicalOperatorType op_type); }; //! The QueryProfilerHistory can be used to access the profiler of previous queries class QueryProfilerHistory { private: static constexpr uint64_t DEFAULT_SIZE = 20; //! Previous Query profilers deque>> prev_profilers; //! Previous Query profilers size uint64_t prev_profilers_size = DEFAULT_SIZE; public: deque>> &GetPrevProfilers() { return prev_profilers; } QueryProfilerHistory() { } void SetPrevProfilersSize(uint64_t prevProfilersSize) { prev_profilers_size = prevProfilersSize; } uint64_t GetPrevProfilersSize() const { return prev_profilers_size; } public: void SetProfilerHistorySize(uint64_t size) { this->prev_profilers_size = size; } void ResetProfilerHistorySize() { this->prev_profilers_size = DEFAULT_SIZE; } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/list.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { using std::list; } namespace duckdb { class ColumnDataCollection; class ColumnDataRowCollection; enum class ValueRenderAlignment { LEFT, MIDDLE, RIGHT }; enum class RenderMode { ROWS, COLUMNS }; struct BoxRendererConfig { // a max_width of 0 means we default to the terminal width idx_t max_width = 0; // the maximum amount of rows to render idx_t max_rows = 20; // the limit that is applied prior to rendering // if we are rendering exactly "limit" rows then a question mark is rendered instead idx_t limit = 0; // the max col width determines the maximum size of a single column // note that the max col width is only used if the result does not fit on the screen idx_t max_col_width = 20; //! how to render NULL values string null_value = "NULL"; //! Whether or not to render row-wise or column-wise RenderMode render_mode = RenderMode::ROWS; #ifndef DUCKDB_ASCII_TREE_RENDERER const char *LTCORNER = "\342\224\214"; // "┌"; const char *RTCORNER = "\342\224\220"; // "┐"; const char *LDCORNER = "\342\224\224"; // "└"; const char *RDCORNER = "\342\224\230"; // "┘"; const char *MIDDLE = "\342\224\274"; // "┼"; const char *TMIDDLE = "\342\224\254"; // "┬"; const char *LMIDDLE = "\342\224\234"; // "├"; const char *RMIDDLE = "\342\224\244"; // "┤"; const char *DMIDDLE = "\342\224\264"; // "┴"; const char *VERTICAL = "\342\224\202"; // "│"; const char *HORIZONTAL = "\342\224\200"; // "─"; const char *DOTDOTDOT = "\xE2\x80\xA6"; // "…"; const char *DOT = "\xC2\xB7"; // "·"; const idx_t DOTDOTDOT_LENGTH = 1; #else // ASCII version const char *LTCORNER = "<"; const char *RTCORNER = ">"; const char *LDCORNER = "<"; const char *RDCORNER = ">"; const char *MIDDLE = "+"; const char *TMIDDLE = "+"; const char *LMIDDLE = "+"; const char *RMIDDLE = "+"; const char *DMIDDLE = "+"; const char *VERTICAL = "|"; const char *HORIZONTAL = "-"; const char *DOTDOTDOT = "..."; // "..."; const char *DOT = "."; // "."; const idx_t DOTDOTDOT_LENGTH = 3; #endif }; class BoxRenderer { static const idx_t SPLIT_COLUMN; public: explicit BoxRenderer(BoxRendererConfig config_p = BoxRendererConfig()); string ToString(ClientContext &context, const vector &names, const ColumnDataCollection &op); void Render(ClientContext &context, const vector &names, const ColumnDataCollection &op, std::ostream &ss); void Print(ClientContext &context, const vector &names, const ColumnDataCollection &op); private: //! The configuration used for rendering BoxRendererConfig config; private: void RenderValue(std::ostream &ss, const string &value, idx_t column_width, ValueRenderAlignment alignment = ValueRenderAlignment::MIDDLE); string RenderType(const LogicalType &type); ValueRenderAlignment TypeAlignment(const LogicalType &type); string GetRenderValue(ColumnDataRowCollection &rows, idx_t c, idx_t r); list FetchRenderCollections(ClientContext &context, const ColumnDataCollection &result, idx_t top_rows, idx_t bottom_rows); list PivotCollections(ClientContext &context, list input, vector &column_names, vector &result_types, idx_t row_count); vector ComputeRenderWidths(const vector &names, const vector &result_types, list &collections, idx_t min_width, idx_t max_width, vector &column_map, idx_t &total_length); void RenderHeader(const vector &names, const vector &result_types, const vector &column_map, const vector &widths, const vector &boundaries, idx_t total_length, bool has_results, std::ostream &ss); void RenderValues(const list &collections, const vector &column_map, const vector &widths, const vector &result_types, std::ostream &ss); void RenderRowCount(string row_count_str, string shown_str, const string &column_count_str, const vector &boundaries, bool has_hidden_rows, bool has_hidden_columns, idx_t total_length, idx_t row_count, idx_t column_count, idx_t minimum_row_length, std::ostream &ss); }; } // namespace duckdb // LICENSE_CHANGE_BEGIN // The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 // See the end of this file for a list #include #include #include #include namespace duckdb { enum class UnicodeType { INVALID, ASCII, UNICODE }; enum class UnicodeInvalidReason { BYTE_MISMATCH, INVALID_UNICODE }; class Utf8Proc { public: //! Distinguishes ASCII, Valid UTF8 and Invalid UTF8 strings static UnicodeType Analyze(const char *s, size_t len, UnicodeInvalidReason *invalid_reason = nullptr, size_t *invalid_pos = nullptr); //! Performs UTF NFC normalization of string, return value needs to be free'd static char* Normalize(const char* s, size_t len); //! Returns whether or not the UTF8 string is valid static bool IsValid(const char *s, size_t len); //! Returns the position (in bytes) of the next grapheme cluster static size_t NextGraphemeCluster(const char *s, size_t len, size_t pos); //! Returns the position (in bytes) of the previous grapheme cluster static size_t PreviousGraphemeCluster(const char *s, size_t len, size_t pos); //! Transform a codepoint to utf8 and writes it to "c", sets "sz" to the size of the codepoint static bool CodepointToUtf8(int cp, int &sz, char *c); //! Returns the codepoint length in bytes when encoded in UTF8 static int CodepointLength(int cp); //! Transform a UTF8 string to a codepoint; returns the codepoint and writes the length of the codepoint (in UTF8) to sz static int32_t UTF8ToCodepoint(const char *c, int &sz); //! Returns the render width of a single character in a string static size_t RenderWidth(const char *s, size_t len, size_t pos); static size_t RenderWidth(const std::string &str); }; } // LICENSE_CHANGE_END //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/checksum.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! Compute a checksum over a buffer of size size uint64_t Checksum(uint8_t *buffer, size_t size); } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/compressed_file_system.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class CompressedFile; struct StreamData { // various buffers & pointers bool write = false; bool refresh = false; unsafe_unique_array in_buff; unsafe_unique_array out_buff; data_ptr_t out_buff_start = nullptr; data_ptr_t out_buff_end = nullptr; data_ptr_t in_buff_start = nullptr; data_ptr_t in_buff_end = nullptr; idx_t in_buf_size = 0; idx_t out_buf_size = 0; }; struct StreamWrapper { DUCKDB_API virtual ~StreamWrapper(); DUCKDB_API virtual void Initialize(CompressedFile &file, bool write) = 0; DUCKDB_API virtual bool Read(StreamData &stream_data) = 0; DUCKDB_API virtual void Write(CompressedFile &file, StreamData &stream_data, data_ptr_t buffer, int64_t nr_bytes) = 0; DUCKDB_API virtual void Close() = 0; }; class CompressedFileSystem : public FileSystem { public: DUCKDB_API int64_t Read(FileHandle &handle, void *buffer, int64_t nr_bytes) override; DUCKDB_API int64_t Write(FileHandle &handle, void *buffer, int64_t nr_bytes) override; DUCKDB_API void Reset(FileHandle &handle) override; DUCKDB_API int64_t GetFileSize(FileHandle &handle) override; DUCKDB_API bool OnDiskFile(FileHandle &handle) override; DUCKDB_API bool CanSeek() override; DUCKDB_API virtual unique_ptr CreateStream() = 0; DUCKDB_API virtual idx_t InBufferSize() = 0; DUCKDB_API virtual idx_t OutBufferSize() = 0; }; class CompressedFile : public FileHandle { public: DUCKDB_API CompressedFile(CompressedFileSystem &fs, unique_ptr child_handle_p, const string &path); DUCKDB_API ~CompressedFile() override; CompressedFileSystem &compressed_fs; unique_ptr child_handle; //! Whether the file is opened for reading or for writing bool write = false; StreamData stream_data; public: DUCKDB_API void Initialize(bool write); DUCKDB_API int64_t ReadData(void *buffer, int64_t nr_bytes); DUCKDB_API int64_t WriteData(data_ptr_t buffer, int64_t nr_bytes); DUCKDB_API void Close() override; private: unique_ptr stream_wrapper; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/crypto/md5.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class MD5Context { public: static constexpr idx_t MD5_HASH_LENGTH_BINARY = 16; static constexpr idx_t MD5_HASH_LENGTH_TEXT = 32; public: MD5Context(); void Add(const_data_ptr_t data, idx_t len) { MD5Update(data, len); } void Add(const char *data); void Add(string_t string) { MD5Update(const_data_ptr_cast(string.GetData()), string.GetSize()); } void Add(const string &data) { MD5Update(const_data_ptr_cast(data.c_str()), data.size()); } //! Write the 16-byte (binary) digest to the specified location void Finish(data_ptr_t out_digest); //! Write the 32-character digest (in hexadecimal format) to the specified location void FinishHex(char *out_digest); //! Returns the 32-character digest (in hexadecimal format) as a string string FinishHex(); private: void MD5Update(const_data_ptr_t data, idx_t len); static void DigestToBase16(const_data_ptr_t digest, char *zBuf); uint32_t buf[4]; uint32_t bits[2]; unsigned char in[64]; }; } // namespace duckdb //------------------------------------------------------------------------- // This file is automatically generated by scripts/generate_enum_util.py // Do not edit this file manually, your changes will be overwritten // If you want to exclude an enum from serialization, add it to the blacklist in the script // // Note: The generated code will only work properly if the enum is a top level item in the duckdb namespace // If the enum is nested in a class, or in another namespace, the generated code will not compile. // You should move the enum to the duckdb namespace, manually write a specialization or add it to the blacklist //------------------------------------------------------------------------- #include namespace duckdb { struct EnumUtil { // String -> Enum template static T FromString(const char *value) = delete; template static T FromString(const string &value) { return FromString(value.c_str()); } // Enum -> String template static const char *ToChars(T value) = delete; template static string ToString(T value) { return string(ToChars(value)); } }; enum class TaskExecutionMode : uint8_t; enum class TaskExecutionResult : uint8_t; enum class InterruptMode : uint8_t; enum class DistinctType : uint8_t; enum class TableFilterType : uint8_t; enum class BindingMode : uint8_t; enum class TableColumnType : uint8_t; enum class AggregateType : uint8_t; enum class AggregateOrderDependent : uint8_t; enum class FunctionNullHandling : uint8_t; enum class FunctionSideEffects : uint8_t; enum class MacroType : uint8_t; enum class ArrowVariableSizeType : uint8_t; enum class ArrowDateTimeType : uint8_t; enum class StrTimeSpecifier : uint8_t; enum class SimplifiedTokenType : uint8_t; enum class KeywordCategory : uint8_t; enum class ResultModifierType : uint8_t; enum class ConstraintType : uint8_t; enum class ForeignKeyType : uint8_t; enum class ParserExtensionResultType : uint8_t; enum class QueryNodeType : uint8_t; enum class SequenceInfo : uint8_t; enum class AlterScalarFunctionType : uint8_t; enum class AlterTableType : uint8_t; enum class AlterViewType : uint8_t; enum class AlterTableFunctionType : uint8_t; enum class AlterType : uint8_t; enum class PragmaType : uint8_t; enum class OnCreateConflict : uint8_t; enum class TransactionType : uint8_t; enum class SampleMethod : uint8_t; enum class ExplainType : uint8_t; enum class OnConflictAction : uint8_t; enum class WindowBoundary : uint8_t; enum class DataFileType : uint8_t; enum class StatsInfo : uint8_t; enum class StatisticsType : uint8_t; enum class ColumnSegmentType : uint8_t; enum class ChunkInfoType : uint8_t; enum class BitpackingMode : uint8_t; enum class BlockState : uint8_t; enum class VerificationType : uint8_t; enum class FileLockType : uint8_t; enum class FileBufferType : uint8_t; enum class ExceptionFormatValueType : uint8_t; enum class ExtraTypeInfoType : uint8_t; enum class PhysicalType : uint8_t; enum class LogicalTypeId : uint8_t; enum class OutputStream : uint8_t; enum class TimestampCastResult : uint8_t; enum class ConflictManagerMode : uint8_t; enum class LookupResultType : uint8_t; enum class MapInvalidReason : uint8_t; enum class UnionInvalidReason : uint8_t; enum class VectorBufferType : uint8_t; enum class VectorAuxiliaryDataType : uint8_t; enum class PartitionedColumnDataType : uint8_t; enum class ColumnDataAllocatorType : uint8_t; enum class ColumnDataScanProperties : uint8_t; enum class PartitionedTupleDataType : uint8_t; enum class TupleDataPinProperties : uint8_t; enum class PartitionSortStage : uint8_t; enum class PhysicalOperatorType : uint8_t; enum class VectorType : uint8_t; enum class AccessMode : uint8_t; enum class FileGlobOptions : uint8_t; enum class WALType : uint8_t; enum class JoinType : uint8_t; enum class FileCompressionType : uint8_t; enum class ProfilerPrintFormat : uint8_t; enum class StatementType : uint8_t; enum class StatementReturnType : uint8_t; enum class OrderPreservationType : uint8_t; enum class DebugInitialize : uint8_t; enum class CatalogType : uint8_t; enum class SetScope : uint8_t; enum class TableScanType : uint8_t; enum class SetType : uint8_t; enum class ExpressionType : uint8_t; enum class ExpressionClass : uint8_t; enum class PendingExecutionResult : uint8_t; enum class WindowAggregationMode : uint32_t; enum class SubqueryType : uint8_t; enum class OrderType : uint8_t; enum class OrderByNullType : uint8_t; enum class DefaultOrderByNullType : uint8_t; enum class DatePartSpecifier : uint8_t; enum class OnEntryNotFound : uint8_t; enum class LogicalOperatorType : uint8_t; enum class OperatorResultType : uint8_t; enum class OperatorFinalizeResultType : uint8_t; enum class SourceResultType : uint8_t; enum class SinkResultType : uint8_t; enum class SinkFinalizeType : uint8_t; enum class JoinRefType : uint8_t; enum class UndoFlags : uint32_t; enum class SetOperationType : uint8_t; enum class OptimizerType : uint32_t; enum class CompressionType : uint8_t; enum class AggregateHandling : uint8_t; enum class TableReferenceType : uint8_t; enum class RelationType : uint8_t; enum class FilterPropagateResult : uint8_t; enum class IndexType : uint8_t; enum class ExplainOutputType : uint8_t; enum class NType : uint8_t; enum class VerifyExistenceType : uint8_t; enum class ParserMode : uint8_t; enum class ErrorType : uint16_t; enum class AppenderType : uint8_t; enum class CheckpointAbort : uint8_t; enum class ExtensionLoadResult : uint8_t; enum class QueryResultType : uint8_t; enum class CAPIResultSetType : uint8_t; template <> const char *EnumUtil::ToChars(TaskExecutionMode value); template <> const char *EnumUtil::ToChars(TaskExecutionResult value); template <> const char *EnumUtil::ToChars(InterruptMode value); template <> const char *EnumUtil::ToChars(DistinctType value); template <> const char *EnumUtil::ToChars(TableFilterType value); template <> const char *EnumUtil::ToChars(BindingMode value); template <> const char *EnumUtil::ToChars(TableColumnType value); template <> const char *EnumUtil::ToChars(AggregateType value); template <> const char *EnumUtil::ToChars(AggregateOrderDependent value); template <> const char *EnumUtil::ToChars(FunctionNullHandling value); template <> const char *EnumUtil::ToChars(FunctionSideEffects value); template <> const char *EnumUtil::ToChars(MacroType value); template <> const char *EnumUtil::ToChars(ArrowVariableSizeType value); template <> const char *EnumUtil::ToChars(ArrowDateTimeType value); template <> const char *EnumUtil::ToChars(StrTimeSpecifier value); template <> const char *EnumUtil::ToChars(SimplifiedTokenType value); template <> const char *EnumUtil::ToChars(KeywordCategory value); template <> const char *EnumUtil::ToChars(ResultModifierType value); template <> const char *EnumUtil::ToChars(ConstraintType value); template <> const char *EnumUtil::ToChars(ForeignKeyType value); template <> const char *EnumUtil::ToChars(ParserExtensionResultType value); template <> const char *EnumUtil::ToChars(QueryNodeType value); template <> const char *EnumUtil::ToChars(SequenceInfo value); template <> const char *EnumUtil::ToChars(AlterScalarFunctionType value); template <> const char *EnumUtil::ToChars(AlterTableType value); template <> const char *EnumUtil::ToChars(AlterViewType value); template <> const char *EnumUtil::ToChars(AlterTableFunctionType value); template <> const char *EnumUtil::ToChars(AlterType value); template <> const char *EnumUtil::ToChars(PragmaType value); template <> const char *EnumUtil::ToChars(OnCreateConflict value); template <> const char *EnumUtil::ToChars(TransactionType value); template <> const char *EnumUtil::ToChars(SampleMethod value); template <> const char *EnumUtil::ToChars(ExplainType value); template <> const char *EnumUtil::ToChars(OnConflictAction value); template <> const char *EnumUtil::ToChars(WindowBoundary value); template <> const char *EnumUtil::ToChars(DataFileType value); template <> const char *EnumUtil::ToChars(StatsInfo value); template <> const char *EnumUtil::ToChars(StatisticsType value); template <> const char *EnumUtil::ToChars(ColumnSegmentType value); template <> const char *EnumUtil::ToChars(ChunkInfoType value); template <> const char *EnumUtil::ToChars(BitpackingMode value); template <> const char *EnumUtil::ToChars(BlockState value); template <> const char *EnumUtil::ToChars(VerificationType value); template <> const char *EnumUtil::ToChars(FileLockType value); template <> const char *EnumUtil::ToChars(FileBufferType value); template <> const char *EnumUtil::ToChars(ExceptionFormatValueType value); template <> const char *EnumUtil::ToChars(ExtraTypeInfoType value); template <> const char *EnumUtil::ToChars(PhysicalType value); template <> const char *EnumUtil::ToChars(LogicalTypeId value); template <> const char *EnumUtil::ToChars(OutputStream value); template <> const char *EnumUtil::ToChars(TimestampCastResult value); template <> const char *EnumUtil::ToChars(ConflictManagerMode value); template <> const char *EnumUtil::ToChars(LookupResultType value); template <> const char *EnumUtil::ToChars(MapInvalidReason value); template <> const char *EnumUtil::ToChars(UnionInvalidReason value); template <> const char *EnumUtil::ToChars(VectorBufferType value); template <> const char *EnumUtil::ToChars(VectorAuxiliaryDataType value); template <> const char *EnumUtil::ToChars(PartitionedColumnDataType value); template <> const char *EnumUtil::ToChars(ColumnDataAllocatorType value); template <> const char *EnumUtil::ToChars(ColumnDataScanProperties value); template <> const char *EnumUtil::ToChars(PartitionedTupleDataType value); template <> const char *EnumUtil::ToChars(TupleDataPinProperties value); template <> const char *EnumUtil::ToChars(PartitionSortStage value); template <> const char *EnumUtil::ToChars(PhysicalOperatorType value); template <> const char *EnumUtil::ToChars(VectorType value); template <> const char *EnumUtil::ToChars(AccessMode value); template <> const char *EnumUtil::ToChars(FileGlobOptions value); template <> const char *EnumUtil::ToChars(WALType value); template <> const char *EnumUtil::ToChars(JoinType value); template <> const char *EnumUtil::ToChars(FileCompressionType value); template <> const char *EnumUtil::ToChars(ProfilerPrintFormat value); template <> const char *EnumUtil::ToChars(StatementType value); template <> const char *EnumUtil::ToChars(StatementReturnType value); template <> const char *EnumUtil::ToChars(OrderPreservationType value); template <> const char *EnumUtil::ToChars(DebugInitialize value); template <> const char *EnumUtil::ToChars(CatalogType value); template <> const char *EnumUtil::ToChars(SetScope value); template <> const char *EnumUtil::ToChars(TableScanType value); template <> const char *EnumUtil::ToChars(SetType value); template <> const char *EnumUtil::ToChars(ExpressionType value); template <> const char *EnumUtil::ToChars(ExpressionClass value); template <> const char *EnumUtil::ToChars(PendingExecutionResult value); template <> const char *EnumUtil::ToChars(WindowAggregationMode value); template <> const char *EnumUtil::ToChars(SubqueryType value); template <> const char *EnumUtil::ToChars(OrderType value); template <> const char *EnumUtil::ToChars(OrderByNullType value); template <> const char *EnumUtil::ToChars(DefaultOrderByNullType value); template <> const char *EnumUtil::ToChars(DatePartSpecifier value); template <> const char *EnumUtil::ToChars(OnEntryNotFound value); template <> const char *EnumUtil::ToChars(LogicalOperatorType value); template <> const char *EnumUtil::ToChars(OperatorResultType value); template <> const char *EnumUtil::ToChars(OperatorFinalizeResultType value); template <> const char *EnumUtil::ToChars(SourceResultType value); template <> const char *EnumUtil::ToChars(SinkResultType value); template <> const char *EnumUtil::ToChars(SinkFinalizeType value); template <> const char *EnumUtil::ToChars(JoinRefType value); template <> const char *EnumUtil::ToChars(UndoFlags value); template <> const char *EnumUtil::ToChars(SetOperationType value); template <> const char *EnumUtil::ToChars(OptimizerType value); template <> const char *EnumUtil::ToChars(CompressionType value); template <> const char *EnumUtil::ToChars(AggregateHandling value); template <> const char *EnumUtil::ToChars(TableReferenceType value); template <> const char *EnumUtil::ToChars(RelationType value); template <> const char *EnumUtil::ToChars(FilterPropagateResult value); template <> const char *EnumUtil::ToChars(IndexType value); template <> const char *EnumUtil::ToChars(ExplainOutputType value); template <> const char *EnumUtil::ToChars(NType value); template <> const char *EnumUtil::ToChars(VerifyExistenceType value); template <> const char *EnumUtil::ToChars(ParserMode value); template <> const char *EnumUtil::ToChars(ErrorType value); template <> const char *EnumUtil::ToChars(AppenderType value); template <> const char *EnumUtil::ToChars(CheckpointAbort value); template <> const char *EnumUtil::ToChars(ExtensionLoadResult value); template <> const char *EnumUtil::ToChars(QueryResultType value); template <> const char *EnumUtil::ToChars(CAPIResultSetType value); template <> TaskExecutionMode EnumUtil::FromString(const char *value); template <> TaskExecutionResult EnumUtil::FromString(const char *value); template <> InterruptMode EnumUtil::FromString(const char *value); template <> DistinctType EnumUtil::FromString(const char *value); template <> TableFilterType EnumUtil::FromString(const char *value); template <> BindingMode EnumUtil::FromString(const char *value); template <> TableColumnType EnumUtil::FromString(const char *value); template <> AggregateType EnumUtil::FromString(const char *value); template <> AggregateOrderDependent EnumUtil::FromString(const char *value); template <> FunctionNullHandling EnumUtil::FromString(const char *value); template <> FunctionSideEffects EnumUtil::FromString(const char *value); template <> MacroType EnumUtil::FromString(const char *value); template <> ArrowVariableSizeType EnumUtil::FromString(const char *value); template <> ArrowDateTimeType EnumUtil::FromString(const char *value); template <> StrTimeSpecifier EnumUtil::FromString(const char *value); template <> SimplifiedTokenType EnumUtil::FromString(const char *value); template <> KeywordCategory EnumUtil::FromString(const char *value); template <> ResultModifierType EnumUtil::FromString(const char *value); template <> ConstraintType EnumUtil::FromString(const char *value); template <> ForeignKeyType EnumUtil::FromString(const char *value); template <> ParserExtensionResultType EnumUtil::FromString(const char *value); template <> QueryNodeType EnumUtil::FromString(const char *value); template <> SequenceInfo EnumUtil::FromString(const char *value); template <> AlterScalarFunctionType EnumUtil::FromString(const char *value); template <> AlterTableType EnumUtil::FromString(const char *value); template <> AlterViewType EnumUtil::FromString(const char *value); template <> AlterTableFunctionType EnumUtil::FromString(const char *value); template <> AlterType EnumUtil::FromString(const char *value); template <> PragmaType EnumUtil::FromString(const char *value); template <> OnCreateConflict EnumUtil::FromString(const char *value); template <> TransactionType EnumUtil::FromString(const char *value); template <> SampleMethod EnumUtil::FromString(const char *value); template <> ExplainType EnumUtil::FromString(const char *value); template <> OnConflictAction EnumUtil::FromString(const char *value); template <> WindowBoundary EnumUtil::FromString(const char *value); template <> DataFileType EnumUtil::FromString(const char *value); template <> StatsInfo EnumUtil::FromString(const char *value); template <> StatisticsType EnumUtil::FromString(const char *value); template <> ColumnSegmentType EnumUtil::FromString(const char *value); template <> ChunkInfoType EnumUtil::FromString(const char *value); template <> BitpackingMode EnumUtil::FromString(const char *value); template <> BlockState EnumUtil::FromString(const char *value); template <> VerificationType EnumUtil::FromString(const char *value); template <> FileLockType EnumUtil::FromString(const char *value); template <> FileBufferType EnumUtil::FromString(const char *value); template <> ExceptionFormatValueType EnumUtil::FromString(const char *value); template <> ExtraTypeInfoType EnumUtil::FromString(const char *value); template <> PhysicalType EnumUtil::FromString(const char *value); template <> LogicalTypeId EnumUtil::FromString(const char *value); template <> OutputStream EnumUtil::FromString(const char *value); template <> TimestampCastResult EnumUtil::FromString(const char *value); template <> ConflictManagerMode EnumUtil::FromString(const char *value); template <> LookupResultType EnumUtil::FromString(const char *value); template <> MapInvalidReason EnumUtil::FromString(const char *value); template <> UnionInvalidReason EnumUtil::FromString(const char *value); template <> VectorBufferType EnumUtil::FromString(const char *value); template <> VectorAuxiliaryDataType EnumUtil::FromString(const char *value); template <> PartitionedColumnDataType EnumUtil::FromString(const char *value); template <> ColumnDataAllocatorType EnumUtil::FromString(const char *value); template <> ColumnDataScanProperties EnumUtil::FromString(const char *value); template <> PartitionedTupleDataType EnumUtil::FromString(const char *value); template <> TupleDataPinProperties EnumUtil::FromString(const char *value); template <> PartitionSortStage EnumUtil::FromString(const char *value); template <> PhysicalOperatorType EnumUtil::FromString(const char *value); template <> VectorType EnumUtil::FromString(const char *value); template <> AccessMode EnumUtil::FromString(const char *value); template <> FileGlobOptions EnumUtil::FromString(const char *value); template <> WALType EnumUtil::FromString(const char *value); template <> JoinType EnumUtil::FromString(const char *value); template <> FileCompressionType EnumUtil::FromString(const char *value); template <> ProfilerPrintFormat EnumUtil::FromString(const char *value); template <> StatementType EnumUtil::FromString(const char *value); template <> StatementReturnType EnumUtil::FromString(const char *value); template <> OrderPreservationType EnumUtil::FromString(const char *value); template <> DebugInitialize EnumUtil::FromString(const char *value); template <> CatalogType EnumUtil::FromString(const char *value); template <> SetScope EnumUtil::FromString(const char *value); template <> TableScanType EnumUtil::FromString(const char *value); template <> SetType EnumUtil::FromString(const char *value); template <> ExpressionType EnumUtil::FromString(const char *value); template <> ExpressionClass EnumUtil::FromString(const char *value); template <> PendingExecutionResult EnumUtil::FromString(const char *value); template <> WindowAggregationMode EnumUtil::FromString(const char *value); template <> SubqueryType EnumUtil::FromString(const char *value); template <> OrderType EnumUtil::FromString(const char *value); template <> OrderByNullType EnumUtil::FromString(const char *value); template <> DefaultOrderByNullType EnumUtil::FromString(const char *value); template <> DatePartSpecifier EnumUtil::FromString(const char *value); template <> OnEntryNotFound EnumUtil::FromString(const char *value); template <> LogicalOperatorType EnumUtil::FromString(const char *value); template <> OperatorResultType EnumUtil::FromString(const char *value); template <> OperatorFinalizeResultType EnumUtil::FromString(const char *value); template <> SourceResultType EnumUtil::FromString(const char *value); template <> SinkResultType EnumUtil::FromString(const char *value); template <> SinkFinalizeType EnumUtil::FromString(const char *value); template <> JoinRefType EnumUtil::FromString(const char *value); template <> UndoFlags EnumUtil::FromString(const char *value); template <> SetOperationType EnumUtil::FromString(const char *value); template <> OptimizerType EnumUtil::FromString(const char *value); template <> CompressionType EnumUtil::FromString(const char *value); template <> AggregateHandling EnumUtil::FromString(const char *value); template <> TableReferenceType EnumUtil::FromString(const char *value); template <> RelationType EnumUtil::FromString(const char *value); template <> FilterPropagateResult EnumUtil::FromString(const char *value); template <> IndexType EnumUtil::FromString(const char *value); template <> ExplainOutputType EnumUtil::FromString(const char *value); template <> NType EnumUtil::FromString(const char *value); template <> VerifyExistenceType EnumUtil::FromString(const char *value); template <> ParserMode EnumUtil::FromString(const char *value); template <> ErrorType EnumUtil::FromString(const char *value); template <> AppenderType EnumUtil::FromString(const char *value); template <> CheckpointAbort EnumUtil::FromString(const char *value); template <> ExtensionLoadResult EnumUtil::FromString(const char *value); template <> QueryResultType EnumUtil::FromString(const char *value); template <> CAPIResultSetType EnumUtil::FromString(const char *value); } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // src/include/duckdb/parallel/interrupt.hpp // // //===----------------------------------------------------------------------===// #include #include namespace duckdb { //! InterruptMode specifies how operators should block/unblock, note that this will happen transparently to the //! operator, as the operator only needs to return a BLOCKED result and call the callback using the InterruptState. //! NO_INTERRUPTS: No blocking mode is specified, an error will be thrown when the operator blocks. Should only be used //! when manually calling operators of which is known they will never block. //! TASK: A weak pointer to a task is provided. On the callback, this task will be signalled. If the Task has //! been deleted, this callback becomes a NOP. This is the preferred way to await blocked pipelines. //! BLOCKING: The caller has blocked awaiting some synchronization primitive to wait for the callback. enum class InterruptMode : uint8_t { NO_INTERRUPTS, TASK, BLOCKING }; //! Synchronization primitive used to await a callback in InterruptMode::BLOCKING. struct InterruptDoneSignalState { //! Called by the callback to signal the interrupt is over void Signal(); //! Await the callback signalling the interrupt is over void Await(); protected: mutex lock; std::condition_variable cv; bool done = false; }; //! State required to make the callback after some asynchronous operation within an operator source / sink. class InterruptState { public: //! Default interrupt state will be set to InterruptMode::NO_INTERRUPTS and throw an error on use of Callback() InterruptState(); //! Register the task to be interrupted and set mode to InterruptMode::TASK, the preferred way to handle interrupts InterruptState(weak_ptr task); //! Register signal state and set mode to InterruptMode::BLOCKING, used for code paths without Task. InterruptState(weak_ptr done_signal); //! Perform the callback to indicate the Interrupt is over DUCKDB_API void Callback() const; protected: //! Current interrupt mode InterruptMode mode; //! Task ptr for InterruptMode::TASK weak_ptr current_task; //! Signal state for InterruptMode::BLOCKING weak_ptr signal_state; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/function/scalar/strftime.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { enum class StrTimeSpecifier : uint8_t { ABBREVIATED_WEEKDAY_NAME = 0, // %a - Abbreviated weekday name. (Sun, Mon, ...) FULL_WEEKDAY_NAME = 1, // %A Full weekday name. (Sunday, Monday, ...) WEEKDAY_DECIMAL = 2, // %w - Weekday as a decimal number. (0, 1, ..., 6) DAY_OF_MONTH_PADDED = 3, // %d - Day of the month as a zero-padded decimal. (01, 02, ..., 31) DAY_OF_MONTH = 4, // %-d - Day of the month as a decimal number. (1, 2, ..., 30) ABBREVIATED_MONTH_NAME = 5, // %b - Abbreviated month name. (Jan, Feb, ..., Dec) FULL_MONTH_NAME = 6, // %B - Full month name. (January, February, ...) MONTH_DECIMAL_PADDED = 7, // %m - Month as a zero-padded decimal number. (01, 02, ..., 12) MONTH_DECIMAL = 8, // %-m - Month as a decimal number. (1, 2, ..., 12) YEAR_WITHOUT_CENTURY_PADDED = 9, // %y - Year without century as a zero-padded decimal number. (00, 01, ..., 99) YEAR_WITHOUT_CENTURY = 10, // %-y - Year without century as a decimal number. (0, 1, ..., 99) YEAR_DECIMAL = 11, // %Y - Year with century as a decimal number. (2013, 2019 etc.) HOUR_24_PADDED = 12, // %H - Hour (24-hour clock) as a zero-padded decimal number. (00, 01, ..., 23) HOUR_24_DECIMAL = 13, // %-H - Hour (24-hour clock) as a decimal number. (0, 1, ..., 23) HOUR_12_PADDED = 14, // %I - Hour (12-hour clock) as a zero-padded decimal number. (01, 02, ..., 12) HOUR_12_DECIMAL = 15, // %-I - Hour (12-hour clock) as a decimal number. (1, 2, ... 12) AM_PM = 16, // %p - Locale’s AM or PM. (AM, PM) MINUTE_PADDED = 17, // %M - Minute as a zero-padded decimal number. (00, 01, ..., 59) MINUTE_DECIMAL = 18, // %-M - Minute as a decimal number. (0, 1, ..., 59) SECOND_PADDED = 19, // %S - Second as a zero-padded decimal number. (00, 01, ..., 59) SECOND_DECIMAL = 20, // %-S - Second as a decimal number. (0, 1, ..., 59) MICROSECOND_PADDED = 21, // %f - Microsecond as a decimal number, zero-padded on the left. (000000 - 999999) MILLISECOND_PADDED = 22, // %g - Millisecond as a decimal number, zero-padded on the left. (000 - 999) UTC_OFFSET = 23, // %z - UTC offset in the form +HHMM or -HHMM. ( ) TZ_NAME = 24, // %Z - Time zone name. ( ) DAY_OF_YEAR_PADDED = 25, // %j - Day of the year as a zero-padded decimal number. (001, 002, ..., 366) DAY_OF_YEAR_DECIMAL = 26, // %-j - Day of the year as a decimal number. (1, 2, ..., 366) WEEK_NUMBER_PADDED_SUN_FIRST = 27, // %U - Week number of the year (Sunday as the first day of the week). All days in a new year preceding the // first Sunday are considered to be in week 0. (00, 01, ..., 53) WEEK_NUMBER_PADDED_MON_FIRST = 28, // %W - Week number of the year (Monday as the first day of the week). All days in a new year preceding the // first Monday are considered to be in week 0. (00, 01, ..., 53) LOCALE_APPROPRIATE_DATE_AND_TIME = 29, // %c - Locale’s appropriate date and time representation. (Mon Sep 30 07:06:05 2013) LOCALE_APPROPRIATE_DATE = 30, // %x - Locale’s appropriate date representation. (09/30/13) LOCALE_APPROPRIATE_TIME = 31 // %X - Locale’s appropriate time representation. (07:06:05) }; struct StrTimeFormat { public: virtual ~StrTimeFormat() { } DUCKDB_API static string ParseFormatSpecifier(const string &format_string, StrTimeFormat &format); inline bool HasFormatSpecifier(StrTimeSpecifier s) const { return std::find(specifiers.begin(), specifiers.end(), s) != specifiers.end(); } //! The full format specifier, for error messages string format_specifier; protected: //! The format specifiers vector specifiers; //! The literals that appear in between the format specifiers //! The following must hold: literals.size() = specifiers.size() + 1 //! Format is literals[0], specifiers[0], literals[1], ..., specifiers[n - 1], literals[n] vector literals; //! The constant size that appears in the format string idx_t constant_size = 0; //! The max numeric width of the specifier (if it is parsed as a number), or -1 if it is not a number vector numeric_width; protected: void AddLiteral(string literal); DUCKDB_API virtual void AddFormatSpecifier(string preceding_literal, StrTimeSpecifier specifier); }; struct StrfTimeFormat : public StrTimeFormat { DUCKDB_API idx_t GetLength(date_t date, dtime_t time, int32_t utc_offset, const char *tz_name); DUCKDB_API void FormatString(date_t date, int32_t data[8], const char *tz_name, char *target); void FormatString(date_t date, dtime_t time, char *target); DUCKDB_API static string Format(timestamp_t timestamp, const string &format); DUCKDB_API void ConvertDateVector(Vector &input, Vector &result, idx_t count); DUCKDB_API void ConvertTimestampVector(Vector &input, Vector &result, idx_t count); protected: //! The variable-length specifiers. To determine total string size, these need to be checked. vector var_length_specifiers; //! Whether or not the current specifier is a special "date" specifier (i.e. one that requires a date_t object to //! generate) vector is_date_specifier; protected: DUCKDB_API void AddFormatSpecifier(string preceding_literal, StrTimeSpecifier specifier) override; static idx_t GetSpecifierLength(StrTimeSpecifier specifier, date_t date, dtime_t time, int32_t utc_offset, const char *tz_name); char *WriteString(char *target, const string_t &str); char *Write2(char *target, uint8_t value); char *WritePadded2(char *target, uint32_t value); char *WritePadded3(char *target, uint32_t value); char *WritePadded(char *target, uint32_t value, size_t padding); bool IsDateSpecifier(StrTimeSpecifier specifier); char *WriteDateSpecifier(StrTimeSpecifier specifier, date_t date, char *target); char *WriteStandardSpecifier(StrTimeSpecifier specifier, int32_t data[], const char *tz_name, size_t tz_len, char *target); }; struct StrpTimeFormat : public StrTimeFormat { public: //! Type-safe parsing argument struct ParseResult { int32_t data[8]; // year, month, day, hour, min, sec, µs, offset string tz; string error_message; idx_t error_position = DConstants::INVALID_INDEX; date_t ToDate(); timestamp_t ToTimestamp(); bool TryToDate(date_t &result); bool TryToTimestamp(timestamp_t &result); DUCKDB_API string FormatError(string_t input, const string &format_specifier); }; public: DUCKDB_API static ParseResult Parse(const string &format, const string &text); DUCKDB_API bool Parse(string_t str, ParseResult &result); DUCKDB_API bool TryParseDate(string_t str, date_t &result, string &error_message); DUCKDB_API bool TryParseTimestamp(string_t str, timestamp_t &result, string &error_message); date_t ParseDate(string_t str); timestamp_t ParseTimestamp(string_t str); protected: static string FormatStrpTimeError(const string &input, idx_t position); DUCKDB_API void AddFormatSpecifier(string preceding_literal, StrTimeSpecifier specifier) override; int NumericSpecifierWidth(StrTimeSpecifier specifier); int32_t TryParseCollection(const char *data, idx_t &pos, idx_t size, const string_t collection[], idx_t collection_count); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_data/transaction_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class TransactionType : uint8_t { INVALID, BEGIN_TRANSACTION, COMMIT, ROLLBACK }; struct TransactionInfo : public ParseInfo { explicit TransactionInfo(TransactionType type); //! The type of transaction statement TransactionType type; public: void Serialize(Serializer &serializer) const; static unique_ptr Deserialize(Deserializer &deserializer); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/statement/insert_statement.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/statement/update_statement.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class UpdateSetInfo { public: UpdateSetInfo(); public: unique_ptr Copy() const; public: // The condition that needs to be met to perform the update unique_ptr condition; // The columns to update vector columns; // The set expressions to execute vector> expressions; protected: UpdateSetInfo(const UpdateSetInfo &other); }; class UpdateStatement : public SQLStatement { public: static constexpr const StatementType TYPE = StatementType::UPDATE_STATEMENT; public: UpdateStatement(); unique_ptr table; unique_ptr from_table; //! keep track of optional returningList if statement contains a RETURNING keyword vector> returning_list; unique_ptr set_info; //! CTEs CommonTableExpressionMap cte_map; protected: UpdateStatement(const UpdateStatement &other); public: string ToString() const override; unique_ptr Copy() const override; }; } // namespace duckdb namespace duckdb { class ExpressionListRef; class UpdateSetInfo; enum class OnConflictAction : uint8_t { THROW, NOTHING, UPDATE, REPLACE // Only used in transform/bind step, changed to UPDATE later }; enum class InsertColumnOrder : uint8_t { INSERT_BY_POSITION = 0, INSERT_BY_NAME = 1 }; class OnConflictInfo { public: OnConflictInfo(); public: unique_ptr Copy() const; public: OnConflictAction action_type; vector indexed_columns; //! The SET information (if action_type == UPDATE) unique_ptr set_info; //! The condition determining whether we apply the DO .. for conflicts that arise unique_ptr condition; protected: OnConflictInfo(const OnConflictInfo &other); }; class InsertStatement : public SQLStatement { public: static constexpr const StatementType TYPE = StatementType::INSERT_STATEMENT; public: InsertStatement(); //! The select statement to insert from unique_ptr select_statement; //! Column names to insert into vector columns; //! Table name to insert to string table; //! Schema name to insert to string schema; //! The catalog name to insert to string catalog; //! keep track of optional returningList if statement contains a RETURNING keyword vector> returning_list; unique_ptr on_conflict_info; unique_ptr table_ref; //! CTEs CommonTableExpressionMap cte_map; //! Whether or not this a DEFAULT VALUES bool default_values = false; //! INSERT BY POSITION or INSERT BY NAME InsertColumnOrder column_order = InsertColumnOrder::INSERT_BY_POSITION; protected: InsertStatement(const InsertStatement &other); public: static string OnConflictActionToString(OnConflictAction action); string ToString() const override; unique_ptr Copy() const override; //! If the INSERT statement is inserted DIRECTLY from a values list (i.e. INSERT INTO tbl VALUES (...)) this returns //! the expression list Otherwise, this returns NULL optional_ptr GetValuesList() const; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/expression/window_expression.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class WindowBoundary : uint8_t { INVALID = 0, UNBOUNDED_PRECEDING = 1, UNBOUNDED_FOLLOWING = 2, CURRENT_ROW_RANGE = 3, CURRENT_ROW_ROWS = 4, EXPR_PRECEDING_ROWS = 5, EXPR_FOLLOWING_ROWS = 6, EXPR_PRECEDING_RANGE = 7, EXPR_FOLLOWING_RANGE = 8 }; const char *ToString(WindowBoundary value); //! The WindowExpression represents a window function in the query. They are a special case of aggregates which is why //! they inherit from them. class WindowExpression : public ParsedExpression { public: static constexpr const ExpressionClass TYPE = ExpressionClass::WINDOW; public: WindowExpression(ExpressionType type, string catalog_name, string schema_name, const string &function_name); //! Catalog of the aggregate function string catalog; //! Schema of the aggregate function string schema; //! Name of the aggregate function string function_name; //! The child expression of the main window function vector> children; //! The set of expressions to partition by vector> partitions; //! The set of ordering clauses vector orders; //! Expression representing a filter, only used for aggregates unique_ptr filter_expr; //! True to ignore NULL values bool ignore_nulls; //! The window boundaries WindowBoundary start = WindowBoundary::INVALID; WindowBoundary end = WindowBoundary::INVALID; unique_ptr start_expr; unique_ptr end_expr; //! Offset and default expressions for WINDOW_LEAD and WINDOW_LAG functions unique_ptr offset_expr; unique_ptr default_expr; public: bool IsWindow() const override { return true; } //! Convert the Expression to a String string ToString() const override; static bool Equal(const WindowExpression &a, const WindowExpression &b); unique_ptr Copy() const override; void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(ExpressionType type, FieldReader &source); void FormatSerialize(FormatSerializer &serializer) const override; static unique_ptr FormatDeserialize(ExpressionType type, FormatDeserializer &deserializer); static ExpressionType WindowToExpressionType(string &fun_name); public: template static string ToString(const T &entry, const string &schema, const string &function_name) { // Start with function call string result = schema.empty() ? function_name : schema + "." + function_name; result += "("; if (entry.children.size()) { result += StringUtil::Join(entry.children, entry.children.size(), ", ", [](const unique_ptr &child) { return child->ToString(); }); } // Lead/Lag extra arguments if (entry.offset_expr.get()) { result += ", "; result += entry.offset_expr->ToString(); } if (entry.default_expr.get()) { result += ", "; result += entry.default_expr->ToString(); } // IGNORE NULLS if (entry.ignore_nulls) { result += " IGNORE NULLS"; } // FILTER if (entry.filter_expr) { result += ") FILTER (WHERE " + entry.filter_expr->ToString(); } // Over clause result += ") OVER ("; string sep; // Partitions if (!entry.partitions.empty()) { result += "PARTITION BY "; result += StringUtil::Join(entry.partitions, entry.partitions.size(), ", ", [](const unique_ptr &partition) { return partition->ToString(); }); sep = " "; } // Orders if (!entry.orders.empty()) { result += sep; result += "ORDER BY "; result += StringUtil::Join(entry.orders, entry.orders.size(), ", ", [](const ORDER_NODE &order) { return order.ToString(); }); sep = " "; } // Rows/Range string units = "ROWS"; string from; switch (entry.start) { case WindowBoundary::CURRENT_ROW_RANGE: case WindowBoundary::CURRENT_ROW_ROWS: from = "CURRENT ROW"; units = (entry.start == WindowBoundary::CURRENT_ROW_RANGE) ? "RANGE" : "ROWS"; break; case WindowBoundary::UNBOUNDED_PRECEDING: if (entry.end != WindowBoundary::CURRENT_ROW_RANGE) { from = "UNBOUNDED PRECEDING"; } break; case WindowBoundary::EXPR_PRECEDING_ROWS: case WindowBoundary::EXPR_PRECEDING_RANGE: from = entry.start_expr->ToString() + " PRECEDING"; units = (entry.start == WindowBoundary::EXPR_PRECEDING_RANGE) ? "RANGE" : "ROWS"; break; case WindowBoundary::EXPR_FOLLOWING_ROWS: case WindowBoundary::EXPR_FOLLOWING_RANGE: from = entry.start_expr->ToString() + " FOLLOWING"; units = (entry.start == WindowBoundary::EXPR_FOLLOWING_RANGE) ? "RANGE" : "ROWS"; break; default: throw InternalException("Unrecognized FROM in WindowExpression"); } string to; switch (entry.end) { case WindowBoundary::CURRENT_ROW_RANGE: if (entry.start != WindowBoundary::UNBOUNDED_PRECEDING) { to = "CURRENT ROW"; units = "RANGE"; } break; case WindowBoundary::CURRENT_ROW_ROWS: to = "CURRENT ROW"; units = "ROWS"; break; case WindowBoundary::UNBOUNDED_PRECEDING: to = "UNBOUNDED PRECEDING"; break; case WindowBoundary::UNBOUNDED_FOLLOWING: to = "UNBOUNDED FOLLOWING"; break; case WindowBoundary::EXPR_PRECEDING_ROWS: case WindowBoundary::EXPR_PRECEDING_RANGE: to = entry.end_expr->ToString() + " PRECEDING"; units = (entry.end == WindowBoundary::EXPR_PRECEDING_RANGE) ? "RANGE" : "ROWS"; break; case WindowBoundary::EXPR_FOLLOWING_ROWS: case WindowBoundary::EXPR_FOLLOWING_RANGE: to = entry.end_expr->ToString() + " FOLLOWING"; units = (entry.end == WindowBoundary::EXPR_FOLLOWING_RANGE) ? "RANGE" : "ROWS"; break; default: throw InternalException("Unrecognized TO in WindowExpression"); } if (!from.empty() || !to.empty()) { result += sep + units; } if (!from.empty() && !to.empty()) { result += " BETWEEN "; result += from; result += " AND "; result += to; } else if (!from.empty()) { result += " "; result += from; } else if (!to.empty()) { result += " "; result += to; } result += ")"; return result; } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/magic_bytes.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class FileSystem; enum class DataFileType : uint8_t { FILE_DOES_NOT_EXIST, // file does not exist DUCKDB_FILE, // duckdb database file SQLITE_FILE, // sqlite database file PARQUET_FILE // parquet file }; class MagicBytes { public: static DataFileType CheckMagicBytes(FileSystem *fs, const string &path); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/verification/statement_verifier.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class VerificationType : uint8_t { ORIGINAL, COPIED, DESERIALIZED, DESERIALIZED_V2, PARSED, UNOPTIMIZED, NO_OPERATOR_CACHING, PREPARED, EXTERNAL, INVALID }; class StatementVerifier { public: StatementVerifier(VerificationType type, string name, unique_ptr statement_p); explicit StatementVerifier(unique_ptr statement_p); static unique_ptr Create(VerificationType type, const SQLStatement &statement_p); virtual ~StatementVerifier() noexcept; //! Check whether expressions in this verifier and the other verifier match void CheckExpressions(const StatementVerifier &other) const; //! Check whether expressions within this verifier match void CheckExpressions() const; //! Run the select statement and store the result virtual bool Run(ClientContext &context, const string &query, const std::function(const string &, unique_ptr)> &run); //! Compare this verifier's results with another verifier string CompareResults(const StatementVerifier &other); public: const VerificationType type; const string name; unique_ptr statement; const vector> &select_list; unique_ptr materialized_result; virtual bool RequireEquality() const { return true; } virtual bool DisableOptimizer() const { return false; } virtual bool DisableOperatorCaching() const { return false; } virtual bool ForceExternal() const { return false; } }; } // namespace duckdb namespace duckdb { class Index; class ConflictInfo; enum class ConflictManagerMode : uint8_t { SCAN, // gather conflicts without throwing THROW // throw on the conflicts that were not found during the scan }; enum class LookupResultType : uint8_t { LOOKUP_MISS, LOOKUP_HIT, LOOKUP_NULL }; class ConflictManager { public: ConflictManager(VerifyExistenceType lookup_type, idx_t input_size, optional_ptr conflict_info = nullptr); public: void SetIndexCount(idx_t count); // These methods return a boolean indicating whether we should throw or not bool AddMiss(idx_t chunk_index); bool AddHit(idx_t chunk_index, row_t row_id); bool AddNull(idx_t chunk_index); VerifyExistenceType LookupType() const; // This should be called before using the conflicts selection vector void Finalize(); idx_t ConflictCount() const; const ManagedSelection &Conflicts() const; Vector &RowIds(); const ConflictInfo &GetConflictInfo() const; void FinishLookup(); void SetMode(ConflictManagerMode mode); private: bool IsConflict(LookupResultType type); const unordered_set &InternalConflictSet() const; Vector &InternalRowIds(); Vector &InternalIntermediate(); ManagedSelection &InternalSelection(); bool SingleIndexTarget() const; bool ShouldThrow(idx_t chunk_index) const; bool ShouldIgnoreNulls() const; void AddConflictInternal(idx_t chunk_index, row_t row_id); void AddToConflictSet(idx_t chunk_index); private: VerifyExistenceType lookup_type; idx_t input_size; optional_ptr conflict_info; idx_t index_count; bool finalized = false; ManagedSelection conflicts; unique_ptr row_ids; // Used to check if a given conflict is part of the conflict target or not unique_ptr> conflict_set; // Contains 'input_size' booleans, indicating if a given index in the input chunk has a conflict unique_ptr intermediate_vector; // Mapping from chunk_index to row_id vector row_id_map; // Whether we have already found the one conflict target we're interested in bool single_index_finished = false; ConflictManagerMode mode; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/column/partitioned_column_data.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/string_map_set.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct PerfectHash { inline std::size_t operator()(const idx_t &h) const { return h; } }; struct PerfectEquality { inline bool operator()(const idx_t &a, const idx_t &b) const { return a == b; } }; template using perfect_map_t = unordered_map; using perfect_set_t = unordered_set; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/column/column_data_allocator.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct ChunkMetaData; struct VectorMetaData; struct BlockMetaData { //! The underlying block handle shared_ptr handle; //! How much space is currently used within the block uint32_t size; //! How much space is available in the block uint32_t capacity; uint32_t Capacity(); }; class ColumnDataAllocator { public: explicit ColumnDataAllocator(Allocator &allocator); explicit ColumnDataAllocator(BufferManager &buffer_manager); ColumnDataAllocator(ClientContext &context, ColumnDataAllocatorType allocator_type); ColumnDataAllocator(ColumnDataAllocator &allocator); //! Returns an allocator object to allocate with. This returns the allocator in IN_MEMORY_ALLOCATOR, and a buffer //! allocator in case of BUFFER_MANAGER_ALLOCATOR. Allocator &GetAllocator(); //! Returns the allocator type ColumnDataAllocatorType GetType() { return type; } void MakeShared() { shared = true; } idx_t BlockCount() const { return blocks.size(); } public: void AllocateData(idx_t size, uint32_t &block_id, uint32_t &offset, ChunkManagementState *chunk_state); void Initialize(ColumnDataAllocator &other); void InitializeChunkState(ChunkManagementState &state, ChunkMetaData &meta_data); data_ptr_t GetDataPointer(ChunkManagementState &state, uint32_t block_id, uint32_t offset); void UnswizzlePointers(ChunkManagementState &state, Vector &result, uint16_t v_offset, uint16_t count, uint32_t block_id, uint32_t offset); //! Deletes the block with the given id void DeleteBlock(uint32_t block_id); private: void AllocateEmptyBlock(idx_t size); BufferHandle AllocateBlock(idx_t size); BufferHandle Pin(uint32_t block_id); bool HasBlocks() const { return !blocks.empty(); } private: void AllocateBuffer(idx_t size, uint32_t &block_id, uint32_t &offset, ChunkManagementState *chunk_state); void AllocateMemory(idx_t size, uint32_t &block_id, uint32_t &offset, ChunkManagementState *chunk_state); void AssignPointer(uint32_t &block_id, uint32_t &offset, data_ptr_t pointer); private: ColumnDataAllocatorType type; union { //! The allocator object (if this is a IN_MEMORY_ALLOCATOR) Allocator *allocator; //! The buffer manager (if this is a BUFFER_MANAGER_ALLOCATOR) BufferManager *buffer_manager; } alloc; //! The set of blocks used by the column data collection vector blocks; //! The set of allocated data vector allocated_data; //! Whether this ColumnDataAllocator is shared across ColumnDataCollections that allocate in parallel bool shared = false; //! Lock used in case this ColumnDataAllocator is shared across threads mutex lock; }; } // namespace duckdb namespace duckdb { //! Local state for parallel partitioning struct PartitionedColumnDataAppendState { public: PartitionedColumnDataAppendState() : partition_indices(LogicalType::UBIGINT) { } public: Vector partition_indices; SelectionVector partition_sel; perfect_map_t partition_entries; DataChunk slice_chunk; vector> partition_buffers; vector> partition_append_states; }; enum class PartitionedColumnDataType : uint8_t { INVALID, //! Radix partitioning on a hash column RADIX, //! Hive-style multi-field partitioning HIVE }; //! Shared allocators for parallel partitioning struct PartitionColumnDataAllocators { mutex lock; vector> allocators; }; //! PartitionedColumnData represents partitioned columnar data, which serves as an interface for different types of //! partitioning, e.g., radix, hive class PartitionedColumnData { public: unique_ptr CreateShared(); virtual ~PartitionedColumnData(); public: //! Initializes a local state for parallel partitioning that can be merged into this PartitionedColumnData void InitializeAppendState(PartitionedColumnDataAppendState &state) const; //! Appends a DataChunk to this PartitionedColumnData void Append(PartitionedColumnDataAppendState &state, DataChunk &input); //! Flushes any remaining data in the append state into this PartitionedColumnData void FlushAppendState(PartitionedColumnDataAppendState &state); //! Combine another PartitionedColumnData into this PartitionedColumnData void Combine(PartitionedColumnData &other); //! Get the partitions in this PartitionedColumnData vector> &GetPartitions(); protected: //===--------------------------------------------------------------------===// // Partitioning type implementation interface //===--------------------------------------------------------------------===// //! Size of the buffers in the append states for this type of partitioning (default 128) virtual idx_t BufferSize() const { return MinValue(128, STANDARD_VECTOR_SIZE); } //! Initialize a PartitionedColumnDataAppendState for this type of partitioning (optional) virtual void InitializeAppendStateInternal(PartitionedColumnDataAppendState &state) const { } //! Compute the partition indices for this type of partitioning for the input DataChunk and store them in the //! `partition_data` of the local state. If this type creates partitions on the fly (for, e.g., hive), this //! function is also in charge of creating new partitions and mapping the input data to a partition index virtual void ComputePartitionIndices(PartitionedColumnDataAppendState &state, DataChunk &input) { throw NotImplementedException("ComputePartitionIndices for this type of PartitionedColumnData"); } protected: //! PartitionedColumnData can only be instantiated by derived classes PartitionedColumnData(PartitionedColumnDataType type, ClientContext &context, vector types); PartitionedColumnData(const PartitionedColumnData &other); //! If the buffer is half full, we append to the partition inline idx_t HalfBufferSize() const { D_ASSERT(IsPowerOfTwo(BufferSize())); return BufferSize() / 2; } //! Create a new shared allocator void CreateAllocator(); //! Create a collection for a specific a partition unique_ptr CreatePartitionCollection(idx_t partition_index) const { return make_uniq(allocators->allocators[partition_index], types); } //! Create a DataChunk used for buffering appends to the partition unique_ptr CreatePartitionBuffer() const; protected: PartitionedColumnDataType type; ClientContext &context; vector types; mutex lock; shared_ptr allocators; vector> partitions; public: template TARGET &Cast() { D_ASSERT(dynamic_cast(this)); return reinterpret_cast(*this); } template const TARGET &Cast() const { D_ASSERT(dynamic_cast(this)); return reinterpret_cast(*this); } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/row/partitioned_tuple_data.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/row/tuple_data_allocator.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/row/tuple_data_layout.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class TupleDataLayout { public: using Aggregates = vector; using ValidityBytes = TemplatedValidityMask; //! Creates an empty TupleDataLayout TupleDataLayout(); //! Create a copy of this TupleDataLayout TupleDataLayout Copy() const; public: //! Initializes the TupleDataLayout with the specified types and aggregates to an empty TupleDataLayout void Initialize(vector types_p, Aggregates aggregates_p, bool align = true, bool heap_offset = true); //! Initializes the TupleDataLayout with the specified types to an empty TupleDataLayout void Initialize(vector types, bool align = true, bool heap_offset = true); //! Initializes the TupleDataLayout with the specified aggregates to an empty TupleDataLayout void Initialize(Aggregates aggregates_p, bool align = true, bool heap_offset = true); //! Returns the number of data columns inline idx_t ColumnCount() const { return types.size(); } //! Returns a list of the column types for this data chunk inline const vector &GetTypes() const { return types; } //! Returns the number of aggregates inline idx_t AggregateCount() const { return aggregates.size(); } //! Returns a list of the aggregates for this data chunk inline Aggregates &GetAggregates() { return aggregates; } //! Returns a map from column id to the struct TupleDataLayout const inline TupleDataLayout &GetStructLayout(idx_t col_idx) const { D_ASSERT(struct_layouts->find(col_idx) != struct_layouts->end()); return struct_layouts->find(col_idx)->second; } //! Returns the total width required for each row, including padding inline idx_t GetRowWidth() const { return row_width; } //! Returns the offset to the start of the data inline idx_t GetDataOffset() const { return flag_width; } //! Returns the total width required for the data, including padding inline idx_t GetDataWidth() const { return data_width; } //! Returns the offset to the start of the aggregates inline idx_t GetAggrOffset() const { return flag_width + data_width; } //! Returns the total width required for the aggregates, including padding inline idx_t GetAggrWidth() const { return aggr_width; } //! Returns the column offsets into each row inline const vector &GetOffsets() const { return offsets; } //! Returns whether all columns in this layout are constant size inline bool AllConstant() const { return all_constant; } inline idx_t GetHeapSizeOffset() const { return heap_size_offset; } private: //! The types of the data columns vector types; //! The aggregate functions Aggregates aggregates; //! Structs are a recursive TupleDataLayout unique_ptr> struct_layouts; //! The width of the validity header idx_t flag_width; //! The width of the data portion idx_t data_width; //! The width of the aggregate state portion idx_t aggr_width; //! The width of the entire row idx_t row_width; //! The offsets to the columns and aggregate data in each row vector offsets; //! Whether all columns in this layout are constant size bool all_constant; //! Offset to the heap size of every row idx_t heap_size_offset; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/row/tuple_data_states.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class TupleDataPinProperties : uint8_t { INVALID, //! Keeps all passed blocks pinned while scanning/iterating over the chunks (for both reading/writing) KEEP_EVERYTHING_PINNED, //! Unpins blocks after they are done (for both reading/writing) UNPIN_AFTER_DONE, //! Destroys blocks after they are done (for reading only) DESTROY_AFTER_DONE, //! Assumes all blocks are already pinned (for reading only) ALREADY_PINNED }; struct TupleDataPinState { unordered_map row_handles; unordered_map heap_handles; TupleDataPinProperties properties = TupleDataPinProperties::INVALID; }; struct CombinedListData { UnifiedVectorFormat combined_data; list_entry_t combined_list_entries[STANDARD_VECTOR_SIZE]; buffer_ptr selection_data; }; struct TupleDataVectorFormat { UnifiedVectorFormat data; vector child_formats; unique_ptr combined_list_data; }; struct TupleDataChunkState { vector vector_data; vector column_ids; Vector row_locations = Vector(LogicalType::POINTER); Vector heap_locations = Vector(LogicalType::POINTER); Vector heap_sizes = Vector(LogicalType::UBIGINT); }; struct TupleDataAppendState { TupleDataPinState pin_state; TupleDataChunkState chunk_state; }; struct TupleDataScanState { TupleDataPinState pin_state; TupleDataChunkState chunk_state; idx_t segment_index = DConstants::INVALID_INDEX; idx_t chunk_index = DConstants::INVALID_INDEX; }; struct TupleDataParallelScanState { TupleDataScanState scan_state; mutex lock; }; using TupleDataLocalScanState = TupleDataScanState; } // namespace duckdb namespace duckdb { struct TupleDataSegment; struct TupleDataChunk; struct TupleDataChunkPart; struct TupleDataBlock { public: TupleDataBlock(BufferManager &buffer_manager, idx_t capacity_p); //! Disable copy constructors TupleDataBlock(const TupleDataBlock &other) = delete; TupleDataBlock &operator=(const TupleDataBlock &) = delete; //! Enable move constructors TupleDataBlock(TupleDataBlock &&other) noexcept; TupleDataBlock &operator=(TupleDataBlock &&) noexcept; public: //! Remaining capacity (in bytes) idx_t RemainingCapacity() const { D_ASSERT(size <= capacity); return capacity - size; } //! Remaining capacity (in rows) idx_t RemainingCapacity(idx_t row_width) const { return RemainingCapacity() / row_width; } public: //! The underlying row block shared_ptr handle; //! Capacity (in bytes) idx_t capacity; //! Occupied size (in bytes) idx_t size; }; class TupleDataAllocator { public: TupleDataAllocator(BufferManager &buffer_manager, const TupleDataLayout &layout); TupleDataAllocator(TupleDataAllocator &allocator); //! Get the buffer allocator Allocator &GetAllocator(); //! Get the layout const TupleDataLayout &GetLayout() const; //! Number of row blocks idx_t RowBlockCount() const; //! Number of heap blocks idx_t HeapBlockCount() const; public: //! Builds out the chunks for next append, given the metadata in the append state void Build(TupleDataSegment &segment, TupleDataPinState &pin_state, TupleDataChunkState &chunk_state, const idx_t append_offset, const idx_t append_count); //! Initializes a chunk, making its pointers valid void InitializeChunkState(TupleDataSegment &segment, TupleDataPinState &pin_state, TupleDataChunkState &chunk_state, idx_t chunk_idx, bool init_heap); static void RecomputeHeapPointers(Vector &old_heap_ptrs, const SelectionVector &old_heap_sel, const data_ptr_t row_locations[], Vector &new_heap_ptrs, const idx_t offset, const idx_t count, const TupleDataLayout &layout, const idx_t base_col_offset); //! Releases or stores any handles in the management state that are no longer required void ReleaseOrStoreHandles(TupleDataPinState &state, TupleDataSegment &segment, TupleDataChunk &chunk, bool release_heap); //! Releases or stores ALL handles in the management state void ReleaseOrStoreHandles(TupleDataPinState &state, TupleDataSegment &segment); private: //! Builds out a single part (grabs the lock) TupleDataChunkPart BuildChunkPart(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state, const idx_t append_offset, const idx_t append_count); //! Internal function for InitializeChunkState void InitializeChunkStateInternal(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state, idx_t offset, bool recompute, bool init_heap_pointers, bool init_heap_sizes, vector &parts); //! Internal function for ReleaseOrStoreHandles static void ReleaseOrStoreHandlesInternal(TupleDataSegment &segment, vector &pinned_row_handles, unordered_map &handles, const unordered_set &block_ids, vector &blocks, TupleDataPinProperties properties); //! Pins the given row block BufferHandle &PinRowBlock(TupleDataPinState &state, const TupleDataChunkPart &part); //! Pins the given heap block BufferHandle &PinHeapBlock(TupleDataPinState &state, const TupleDataChunkPart &part); //! Gets the pointer to the rows for the given chunk part data_ptr_t GetRowPointer(TupleDataPinState &state, const TupleDataChunkPart &part); //! Gets the base pointer to the heap for the given chunk part data_ptr_t GetBaseHeapPointer(TupleDataPinState &state, const TupleDataChunkPart &part); private: //! The buffer manager BufferManager &buffer_manager; //! The layout of the data const TupleDataLayout layout; //! Blocks storing the fixed-size rows vector row_blocks; //! Blocks storing the variable-size data of the fixed-size rows (e.g., string, list) vector heap_blocks; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/row/tuple_data_collection.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/row/tuple_data_segment.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class TupleDataAllocator; class TupleDataLayout; struct TupleDataChunkPart { public: TupleDataChunkPart(); //! Disable copy constructors TupleDataChunkPart(const TupleDataChunkPart &other) = delete; TupleDataChunkPart &operator=(const TupleDataChunkPart &) = delete; //! Enable move constructors TupleDataChunkPart(TupleDataChunkPart &&other) noexcept; TupleDataChunkPart &operator=(TupleDataChunkPart &&) noexcept; static constexpr const uint32_t INVALID_INDEX = (uint32_t)-1; public: //! Index/offset of the row block uint32_t row_block_index; uint32_t row_block_offset; //! Pointer/index/offset of the heap block uint32_t heap_block_index; uint32_t heap_block_offset; data_ptr_t base_heap_ptr; //! Total heap size for this chunk part uint32_t total_heap_size; //! Tuple count for this chunk part uint32_t count; //! Lock for recomputing heap pointers mutex lock; }; struct TupleDataChunk { public: TupleDataChunk(); //! Disable copy constructors TupleDataChunk(const TupleDataChunk &other) = delete; TupleDataChunk &operator=(const TupleDataChunk &) = delete; //! Enable move constructors TupleDataChunk(TupleDataChunk &&other) noexcept; TupleDataChunk &operator=(TupleDataChunk &&) noexcept; //! Add a part to this chunk void AddPart(TupleDataChunkPart &&part, const TupleDataLayout &layout); //! Tries to merge the last chunk part into the second-to-last one void MergeLastChunkPart(const TupleDataLayout &layout); //! Verify counts of the parts in this chunk void Verify() const; public: //! The parts of this chunk vector parts; //! The row block ids referenced by the chunk unordered_set row_block_ids; //! The heap block ids referenced by the chunk unordered_set heap_block_ids; //! Tuple count for this chunk idx_t count; }; struct TupleDataSegment { public: explicit TupleDataSegment(shared_ptr allocator); ~TupleDataSegment(); //! Disable copy constructors TupleDataSegment(const TupleDataSegment &other) = delete; TupleDataSegment &operator=(const TupleDataSegment &) = delete; //! Enable move constructors TupleDataSegment(TupleDataSegment &&other) noexcept; TupleDataSegment &operator=(TupleDataSegment &&) noexcept; //! The number of chunks in this segment idx_t ChunkCount() const; //! The size (in bytes) of this segment idx_t SizeInBytes() const; //! Unpins all held pins void Unpin(); //! Verify counts of the chunks in this segment void Verify() const; //! Verify that all blocks in this segment are pinned void VerifyEverythingPinned() const; public: //! The allocator for this segment shared_ptr allocator; //! The chunks of this segment unsafe_vector chunks; //! The tuple count of this segment idx_t count; //! Lock for modifying pinned_handles mutex pinned_handles_lock; //! Where handles to row blocks will be stored with TupleDataPinProperties::KEEP_EVERYTHING_PINNED vector pinned_row_handles; //! Where handles to heap blocks will be stored with TupleDataPinProperties::KEEP_EVERYTHING_PINNED vector pinned_heap_handles; }; } // namespace duckdb namespace duckdb { class TupleDataAllocator; struct TupleDataScatterFunction; struct TupleDataGatherFunction; typedef void (*tuple_data_scatter_function_t)(const Vector &source, const TupleDataVectorFormat &source_format, const SelectionVector &append_sel, const idx_t append_count, const TupleDataLayout &layout, Vector &row_locations, Vector &heap_locations, const idx_t col_idx, const UnifiedVectorFormat &list_format, const vector &child_functions); struct TupleDataScatterFunction { tuple_data_scatter_function_t function; vector child_functions; }; typedef void (*tuple_data_gather_function_t)(const TupleDataLayout &layout, Vector &row_locations, const idx_t col_idx, const SelectionVector &scan_sel, const idx_t scan_count, Vector &target, const SelectionVector &target_sel, Vector &list_vector, const vector &child_functions); struct TupleDataGatherFunction { tuple_data_gather_function_t function; vector child_functions; }; //! TupleDataCollection represents a set of buffer-managed data stored in row format //! FIXME: rename to RowDataCollection after we phase it out class TupleDataCollection { friend class TupleDataChunkIterator; public: //! Constructs a TupleDataCollection with the specified layout TupleDataCollection(BufferManager &buffer_manager, const TupleDataLayout &layout); //! Constructs a TupleDataCollection with the same (shared) allocator explicit TupleDataCollection(shared_ptr allocator); ~TupleDataCollection(); public: //! The layout of the stored rows const TupleDataLayout &GetLayout() const; //! The number of rows stored in the tuple data collection const idx_t &Count() const; //! The number of chunks stored in the tuple data collection idx_t ChunkCount() const; //! The size (in bytes) of the blocks held by this tuple data collection idx_t SizeInBytes() const; //! Get pointers to the pinned blocks void GetBlockPointers(vector &block_pointers) const; //! Unpins all held pins void Unpin(); //! Gets the scatter function for the given type static TupleDataScatterFunction GetScatterFunction(const LogicalType &type, bool within_list = false); //! Gets the gather function for the given type static TupleDataGatherFunction GetGatherFunction(const LogicalType &type, bool within_list = false); //! Initializes an Append state - useful for optimizing many appends made to the same tuple data collection void InitializeAppend(TupleDataAppendState &append_state, TupleDataPinProperties properties = TupleDataPinProperties::UNPIN_AFTER_DONE); //! Initializes an Append state - useful for optimizing many appends made to the same tuple data collection void InitializeAppend(TupleDataAppendState &append_state, vector column_ids, TupleDataPinProperties properties = TupleDataPinProperties::UNPIN_AFTER_DONE); //! Initializes the Pin state of an Append state //! - Useful for optimizing many appends made to the same tuple data collection void InitializeAppend(TupleDataPinState &pin_state, TupleDataPinProperties = TupleDataPinProperties::UNPIN_AFTER_DONE); //! Initializes the Chunk state of an Append state //! - Useful for optimizing many appends made to the same tuple data collection void InitializeAppend(TupleDataChunkState &chunk_state, vector column_ids = {}); //! Append a DataChunk directly to this TupleDataCollection - calls InitializeAppend and Append internally void Append(DataChunk &new_chunk, const SelectionVector &append_sel = *FlatVector::IncrementalSelectionVector(), idx_t append_count = DConstants::INVALID_INDEX); //! Append a DataChunk directly to this TupleDataCollection - calls InitializeAppend and Append internally void Append(DataChunk &new_chunk, vector column_ids, const SelectionVector &append_sel = *FlatVector::IncrementalSelectionVector(), const idx_t append_count = DConstants::INVALID_INDEX); //! Append a DataChunk to this TupleDataCollection using the specified Append state void Append(TupleDataAppendState &append_state, DataChunk &new_chunk, const SelectionVector &append_sel = *FlatVector::IncrementalSelectionVector(), const idx_t append_count = DConstants::INVALID_INDEX); //! Append a DataChunk to this TupleDataCollection using the specified pin and Chunk states void Append(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state, DataChunk &new_chunk, const SelectionVector &append_sel = *FlatVector::IncrementalSelectionVector(), const idx_t append_count = DConstants::INVALID_INDEX); //! Append a DataChunk to this TupleDataCollection using the specified pin and Chunk states //! - ToUnifiedFormat has already been called void AppendUnified(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state, DataChunk &new_chunk, const SelectionVector &append_sel = *FlatVector::IncrementalSelectionVector(), const idx_t append_count = DConstants::INVALID_INDEX); //! Creates a UnifiedVectorFormat in the given Chunk state for the given DataChunk static void ToUnifiedFormat(TupleDataChunkState &chunk_state, DataChunk &new_chunk); //! Gets the UnifiedVectorFormat from the Chunk state as an array static void GetVectorData(const TupleDataChunkState &chunk_state, UnifiedVectorFormat result[]); //! Computes the heap sizes for the new DataChunk that will be appended static void ComputeHeapSizes(TupleDataChunkState &chunk_state, const DataChunk &new_chunk, const SelectionVector &append_sel, const idx_t append_count); //! Builds out the buffer space for the specified Chunk state void Build(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state, const idx_t append_offset, const idx_t append_count); //! Scatters the given DataChunk to the rows in the specified Chunk state void Scatter(TupleDataChunkState &chunk_state, const DataChunk &new_chunk, const SelectionVector &append_sel, const idx_t append_count) const; //! Scatters the given Vector to the given column id to the rows in the specified Chunk state void Scatter(TupleDataChunkState &chunk_state, const Vector &source, const column_t column_id, const SelectionVector &append_sel, const idx_t append_count) const; //! Copy rows from input to the built Chunk state void CopyRows(TupleDataChunkState &chunk_state, TupleDataChunkState &input, const SelectionVector &append_sel, const idx_t append_count) const; //! Finalizes the Pin state, releasing or storing blocks void FinalizePinState(TupleDataPinState &pin_state, TupleDataSegment &segment); //! Finalizes the Pin state, releasing or storing blocks void FinalizePinState(TupleDataPinState &pin_state); //! Appends the other TupleDataCollection to this, destroying the other data collection void Combine(TupleDataCollection &other); //! Appends the other TupleDataCollection to this, destroying the other data collection void Combine(unique_ptr other); //! Resets the TupleDataCollection, clearing all data void Reset(); //! Initializes a chunk with the correct types that can be used to call Append/Scan void InitializeChunk(DataChunk &chunk) const; //! Initializes a chunk with the correct types for a given scan state void InitializeScanChunk(TupleDataScanState &state, DataChunk &chunk) const; //! Initializes a Scan state for scanning all columns void InitializeScan(TupleDataScanState &state, TupleDataPinProperties properties = TupleDataPinProperties::UNPIN_AFTER_DONE) const; //! Initializes a Scan state for scanning a subset of the columns void InitializeScan(TupleDataScanState &state, vector column_ids, TupleDataPinProperties properties = TupleDataPinProperties::UNPIN_AFTER_DONE) const; //! Initialize a parallel scan over the tuple data collection over all columns void InitializeScan(TupleDataParallelScanState &state, TupleDataPinProperties properties = TupleDataPinProperties::UNPIN_AFTER_DONE) const; //! Initialize a parallel scan over the tuple data collection over a subset of the columns void InitializeScan(TupleDataParallelScanState &gstate, vector column_ids, TupleDataPinProperties properties = TupleDataPinProperties::UNPIN_AFTER_DONE) const; //! Scans a DataChunk from the TupleDataCollection bool Scan(TupleDataScanState &state, DataChunk &result); //! Scans a DataChunk from the TupleDataCollection bool Scan(TupleDataParallelScanState &gstate, TupleDataLocalScanState &lstate, DataChunk &result); //! Gathers a DataChunk from the TupleDataCollection, given the specific row locations (requires full pin) void Gather(Vector &row_locations, const SelectionVector &scan_sel, const idx_t scan_count, DataChunk &result, const SelectionVector &target_sel) const; //! Gathers a DataChunk (only the columns given by column_ids) from the TupleDataCollection, //! given the specific row locations (requires full pin) void Gather(Vector &row_locations, const SelectionVector &scan_sel, const idx_t scan_count, const vector &column_ids, DataChunk &result, const SelectionVector &target_sel) const; //! Gathers a Vector (from the given column id) from the TupleDataCollection //! given the specific row locations (requires full pin) void Gather(Vector &row_locations, const SelectionVector &sel, const idx_t scan_count, const column_t column_id, Vector &result, const SelectionVector &target_sel) const; //! Converts this TupleDataCollection to a string representation string ToString(); //! Prints the string representation of this TupleDataCollection void Print(); //! Verify that all blocks are pinned void VerifyEverythingPinned() const; private: //! Initializes the TupleDataCollection (called by the constructor) void Initialize(); //! Gets all column ids void GetAllColumnIDs(vector &column_ids); //! Computes the heap sizes for the specific Vector that will be appended static void ComputeHeapSizes(Vector &heap_sizes_v, const Vector &source_v, TupleDataVectorFormat &source, const SelectionVector &append_sel, const idx_t append_count); //! Computes the heap sizes for the specific Vector that will be appended (within a list) static void WithinListHeapComputeSizes(Vector &heap_sizes_v, const Vector &source_v, TupleDataVectorFormat &source_format, const SelectionVector &append_sel, const idx_t append_count, const UnifiedVectorFormat &list_data); //! Computes the heap sizes for the fixed-size type Vector that will be appended (within a list) static void ComputeFixedWithinListHeapSizes(Vector &heap_sizes_v, const Vector &source_v, TupleDataVectorFormat &source_format, const SelectionVector &append_sel, const idx_t append_count, const UnifiedVectorFormat &list_data); //! Computes the heap sizes for the string Vector that will be appended (within a list) static void StringWithinListComputeHeapSizes(Vector &heap_sizes_v, const Vector &source_v, TupleDataVectorFormat &source_format, const SelectionVector &append_sel, const idx_t append_count, const UnifiedVectorFormat &list_data); //! Computes the heap sizes for the struct Vector that will be appended (within a list) static void StructWithinListComputeHeapSizes(Vector &heap_sizes_v, const Vector &source_v, TupleDataVectorFormat &source_format, const SelectionVector &append_sel, const idx_t append_count, const UnifiedVectorFormat &list_data); //! Computes the heap sizes for the list Vector that will be appended (within a list) static void ListWithinListComputeHeapSizes(Vector &heap_sizes_v, const Vector &source_v, TupleDataVectorFormat &source_format, const SelectionVector &append_sel, const idx_t append_count, const UnifiedVectorFormat &list_data); //! Get the next segment/chunk index for the scan bool NextScanIndex(TupleDataScanState &scan_state, idx_t &segment_index, idx_t &chunk_index); //! Scans the chunk at the given segment/chunk indices void ScanAtIndex(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state, const vector &column_ids, idx_t segment_index, idx_t chunk_index, DataChunk &result); //! Verify counts of the segments in this collection void Verify() const; private: //! The layout of the TupleDataCollection const TupleDataLayout layout; //! The TupleDataAllocator shared_ptr allocator; //! The number of entries stored in the TupleDataCollection idx_t count; //! The data segments of the TupleDataCollection unsafe_vector segments; //! The set of scatter functions vector scatter_functions; //! The set of gather functions vector gather_functions; }; } // namespace duckdb namespace duckdb { //! Local state for parallel partitioning struct PartitionedTupleDataAppendState { public: PartitionedTupleDataAppendState() : partition_indices(LogicalType::UBIGINT) { } public: Vector partition_indices; SelectionVector partition_sel; static constexpr idx_t MAP_THRESHOLD = 32; perfect_map_t partition_entries; list_entry_t partition_entries_arr[MAP_THRESHOLD]; vector> partition_pin_states; TupleDataChunkState chunk_state; }; enum class PartitionedTupleDataType : uint8_t { INVALID, //! Radix partitioning on a hash column RADIX }; //! Shared allocators for parallel partitioning struct PartitionTupleDataAllocators { mutex lock; vector> allocators; }; //! PartitionedTupleData represents partitioned row data, which serves as an interface for different types of //! partitioning, e.g., radix, hive class PartitionedTupleData { public: unique_ptr CreateShared(); virtual ~PartitionedTupleData(); public: //! Get the partitioning type of this PartitionedTupleData PartitionedTupleDataType GetType() const; //! Initializes a local state for parallel partitioning that can be merged into this PartitionedTupleData void InitializeAppendState(PartitionedTupleDataAppendState &state, TupleDataPinProperties properties = TupleDataPinProperties::UNPIN_AFTER_DONE) const; //! Appends a DataChunk to this PartitionedTupleData void Append(PartitionedTupleDataAppendState &state, DataChunk &input); //! Appends rows to this PartitionedTupleData void Append(PartitionedTupleDataAppendState &state, TupleDataChunkState &input, idx_t count); //! Flushes any remaining data in the append state into this PartitionedTupleData void FlushAppendState(PartitionedTupleDataAppendState &state); //! Combine another PartitionedTupleData into this PartitionedTupleData void Combine(PartitionedTupleData &other); //! Partition a TupleDataCollection void Partition(TupleDataCollection &source, TupleDataPinProperties properties = TupleDataPinProperties::UNPIN_AFTER_DONE); //! Repartition this PartitionedTupleData into the new PartitionedTupleData void Repartition(PartitionedTupleData &new_partitioned_data); //! Get the partitions in this PartitionedTupleData vector> &GetPartitions(); //! Get the count of this PartitionedTupleData idx_t Count() const; //! Get the size (in bytes) of this PartitionedTupleData idx_t SizeInBytes() const; protected: //===--------------------------------------------------------------------===// // Partitioning type implementation interface //===--------------------------------------------------------------------===// //! Initialize a PartitionedTupleDataAppendState for this type of partitioning (optional) virtual void InitializeAppendStateInternal(PartitionedTupleDataAppendState &state, TupleDataPinProperties properties) const { } //! Compute the partition indices for this type of partitioning for the input DataChunk and store them in the //! `partition_data` of the local state. If this type creates partitions on the fly (for, e.g., hive), this //! function is also in charge of creating new partitions and mapping the input data to a partition index virtual void ComputePartitionIndices(PartitionedTupleDataAppendState &state, DataChunk &input) { throw NotImplementedException("ComputePartitionIndices for this type of PartitionedTupleData"); } //! Compute partition indices from rows (similar to function above) virtual void ComputePartitionIndices(Vector &row_locations, idx_t count, Vector &partition_indices) const { throw NotImplementedException("ComputePartitionIndices for this type of PartitionedTupleData"); } //! Maximum partition index (optional) virtual idx_t MaxPartitionIndex() const { return DConstants::INVALID_INDEX; } //! Whether or not to iterate over the original partitions in reverse order when repartitioning (optional) virtual bool RepartitionReverseOrder() const { return false; } //! Finalize states while repartitioning - useful for unpinning blocks that are no longer needed (optional) virtual void RepartitionFinalizeStates(PartitionedTupleData &old_partitioned_data, PartitionedTupleData &new_partitioned_data, PartitionedTupleDataAppendState &state, idx_t finished_partition_idx) const { } protected: //! PartitionedTupleData can only be instantiated by derived classes PartitionedTupleData(PartitionedTupleDataType type, BufferManager &buffer_manager, const TupleDataLayout &layout); PartitionedTupleData(const PartitionedTupleData &other); //! Create a new shared allocator void CreateAllocator(); //! Builds a selection vector in the Append state for the partitions //! - returns true if everything belongs to the same partition - stores partition index in single_partition_idx void BuildPartitionSel(PartitionedTupleDataAppendState &state, idx_t count); //! Builds out the buffer space in the partitions void BuildBufferSpace(PartitionedTupleDataAppendState &state); //! Create a collection for a specific a partition unique_ptr CreatePartitionCollection(idx_t partition_index) const { return make_uniq(allocators->allocators[partition_index]); } protected: PartitionedTupleDataType type; BufferManager &buffer_manager; const TupleDataLayout layout; mutex lock; shared_ptr allocators; vector> partitions; public: template TARGET &Cast() { D_ASSERT(dynamic_cast(this)); return reinterpret_cast(*this); } template const TARGET &Cast() const { D_ASSERT(dynamic_cast(this)); return reinterpret_cast(*this); } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/sort/partition_state.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/radix_partitioning.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class BufferManager; class Vector; struct UnifiedVectorFormat; struct SelectionVector; //! Generic radix partitioning functions struct RadixPartitioning { public: //! The number of partitions for a given number of radix bits static inline constexpr idx_t NumberOfPartitions(idx_t radix_bits) { return idx_t(1) << radix_bits; } //! Inverse of NumberOfPartitions, given a number of partitions, get the number of radix bits static inline idx_t RadixBits(idx_t n_partitions) { D_ASSERT(IsPowerOfTwo(n_partitions)); for (idx_t r = 0; r < sizeof(idx_t) * 8; r++) { if (n_partitions == NumberOfPartitions(r)) { return r; } } throw InternalException("RadixPartitioning::RadixBits unable to find partition count!"); } static inline constexpr idx_t Shift(idx_t radix_bits) { return 48 - radix_bits; } static inline constexpr hash_t Mask(idx_t radix_bits) { return (hash_t(1 << radix_bits) - 1) << Shift(radix_bits); } //! Select using a cutoff on the radix bits of the hash static idx_t Select(Vector &hashes, const SelectionVector *sel, idx_t count, idx_t radix_bits, idx_t cutoff, SelectionVector *true_sel, SelectionVector *false_sel); //! Convert hashes to bins static void HashesToBins(Vector &hashes, idx_t radix_bits, Vector &bins, idx_t count); }; //! Templated radix partitioning constants, can be templated to the number of radix bits template struct RadixPartitioningConstants { public: //! Bitmask of the upper bits of the 5th byte static constexpr const idx_t NUM_PARTITIONS = RadixPartitioning::NumberOfPartitions(radix_bits); static constexpr const idx_t SHIFT = RadixPartitioning::Shift(radix_bits); static constexpr const hash_t MASK = RadixPartitioning::Mask(radix_bits); public: //! Apply bitmask and right shift to get a number between 0 and NUM_PARTITIONS static inline hash_t ApplyMask(hash_t hash) { D_ASSERT((hash & MASK) >> SHIFT < NUM_PARTITIONS); return (hash & MASK) >> SHIFT; } }; //! RadixPartitionedColumnData is a PartitionedColumnData that partitions input based on the radix of a hash class RadixPartitionedColumnData : public PartitionedColumnData { public: RadixPartitionedColumnData(ClientContext &context, vector types, idx_t radix_bits, idx_t hash_col_idx); RadixPartitionedColumnData(const RadixPartitionedColumnData &other); ~RadixPartitionedColumnData() override; idx_t GetRadixBits() const { return radix_bits; } protected: //===--------------------------------------------------------------------===// // Radix Partitioning interface implementation //===--------------------------------------------------------------------===// idx_t BufferSize() const override { switch (radix_bits) { case 1: case 2: case 3: case 4: return GetBufferSize(1 << 1); case 5: return GetBufferSize(1 << 2); case 6: return GetBufferSize(1 << 3); default: return GetBufferSize(1 << 4); } } void InitializeAppendStateInternal(PartitionedColumnDataAppendState &state) const override; void ComputePartitionIndices(PartitionedColumnDataAppendState &state, DataChunk &input) override; static constexpr idx_t GetBufferSize(idx_t div) { return STANDARD_VECTOR_SIZE / div == 0 ? 1 : STANDARD_VECTOR_SIZE / div; } private: //! The number of radix bits const idx_t radix_bits; //! The index of the column holding the hashes const idx_t hash_col_idx; }; //! RadixPartitionedTupleData is a PartitionedTupleData that partitions input based on the radix of a hash class RadixPartitionedTupleData : public PartitionedTupleData { public: RadixPartitionedTupleData(BufferManager &buffer_manager, const TupleDataLayout &layout, idx_t radix_bits_p, idx_t hash_col_idx_p); RadixPartitionedTupleData(const RadixPartitionedTupleData &other); ~RadixPartitionedTupleData() override; idx_t GetRadixBits() const { return radix_bits; } private: void Initialize(); protected: //===--------------------------------------------------------------------===// // Radix Partitioning interface implementation //===--------------------------------------------------------------------===// void InitializeAppendStateInternal(PartitionedTupleDataAppendState &state, TupleDataPinProperties properties) const override; void ComputePartitionIndices(PartitionedTupleDataAppendState &state, DataChunk &input) override; void ComputePartitionIndices(Vector &row_locations, idx_t count, Vector &partition_indices) const override; idx_t MaxPartitionIndex() const override { return RadixPartitioning::NumberOfPartitions(radix_bits) - 1; } bool RepartitionReverseOrder() const override { return true; } void RepartitionFinalizeStates(PartitionedTupleData &old_partitioned_data, PartitionedTupleData &new_partitioned_data, PartitionedTupleDataAppendState &state, idx_t finished_partition_idx) const override; private: //! The number of radix bits const idx_t radix_bits; //! The index of the column holding the hashes const idx_t hash_col_idx; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parallel/base_pipeline_event.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parallel/event.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class Executor; class Task; class Event : public std::enable_shared_from_this { public: Event(Executor &executor); virtual ~Event() = default; public: virtual void Schedule() = 0; //! Called right after the event is finished virtual void FinishEvent() { } //! Called after the event is entirely finished virtual void FinalizeFinish() { } void FinishTask(); void Finish(); void AddDependency(Event &event); bool HasDependencies() const { return total_dependencies != 0; } const vector &GetParentsVerification() const; void CompleteDependency(); void SetTasks(vector> tasks); void InsertEvent(shared_ptr replacement_event); bool IsFinished() const { return finished; } virtual void PrintPipeline() { } protected: Executor &executor; //! The current threads working on the event atomic finished_tasks; //! The maximum amount of threads that can work on the event atomic total_tasks; //! The amount of completed dependencies //! The event can only be started after the dependencies have finished executing atomic finished_dependencies; //! The total amount of dependencies idx_t total_dependencies; //! The events that depend on this event to run vector> parents; //! Raw pointers to the parents (used for verification only) vector parents_raw; //! Whether or not the event is finished executing atomic finished; }; } // namespace duckdb namespace duckdb { //! A BasePipelineEvent is used as the basis of any event that belongs to a specific pipeline class BasePipelineEvent : public Event { public: explicit BasePipelineEvent(shared_ptr pipeline); explicit BasePipelineEvent(Pipeline &pipeline); void PrintPipeline() override { pipeline->Print(); } //! The pipeline that this event belongs to shared_ptr pipeline; }; } // namespace duckdb namespace duckdb { class PartitionGlobalHashGroup { public: using GlobalSortStatePtr = unique_ptr; using LocalSortStatePtr = unique_ptr; using Orders = vector; using Types = vector; PartitionGlobalHashGroup(BufferManager &buffer_manager, const Orders &partitions, const Orders &orders, const Types &payload_types, bool external); int ComparePartitions(const SBIterator &left, const SBIterator &right) const; void ComputeMasks(ValidityMask &partition_mask, ValidityMask &order_mask); GlobalSortStatePtr global_sort; atomic count; // Mask computation SortLayout partition_layout; }; class PartitionGlobalSinkState { public: using HashGroupPtr = unique_ptr; using Orders = vector; using Types = vector; using GroupingPartition = unique_ptr; using GroupingAppend = unique_ptr; static void GenerateOrderings(Orders &partitions, Orders &orders, const vector> &partition_bys, const Orders &order_bys, const vector> &partitions_stats); PartitionGlobalSinkState(ClientContext &context, const vector> &partition_bys, const vector &order_bys, const Types &payload_types, const vector> &partitions_stats, idx_t estimated_cardinality); void UpdateLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append); void CombineLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append); void BuildSortState(ColumnDataCollection &group_data, PartitionGlobalHashGroup &global_sort); ClientContext &context; BufferManager &buffer_manager; Allocator &allocator; mutex lock; // OVER(PARTITION BY...) (hash grouping) unique_ptr grouping_data; //! Payload plus hash column Types grouping_types; // OVER(...) (sorting) Orders partitions; Orders orders; const Types payload_types; vector hash_groups; bool external; // Reverse lookup from hash bins to non-empty hash groups vector bin_groups; // OVER() (no sorting) unique_ptr rows; unique_ptr strings; // Threading idx_t memory_per_thread; atomic count; private: void ResizeGroupingData(idx_t cardinality); void SyncLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append); }; class PartitionLocalSinkState { public: PartitionLocalSinkState(ClientContext &context, PartitionGlobalSinkState &gstate_p); // Global state PartitionGlobalSinkState &gstate; Allocator &allocator; // OVER(PARTITION BY...) (hash grouping) ExpressionExecutor executor; DataChunk group_chunk; DataChunk payload_chunk; unique_ptr local_partition; unique_ptr local_append; // OVER(...) (sorting) size_t sort_cols; // OVER() (no sorting) RowLayout payload_layout; unique_ptr rows; unique_ptr strings; //! Compute the hash values void Hash(DataChunk &input_chunk, Vector &hash_vector); //! Sink an input chunk void Sink(DataChunk &input_chunk); //! Merge the state into the global state. void Combine(); }; enum class PartitionSortStage : uint8_t { INIT, PREPARE, MERGE, SORTED }; class PartitionLocalMergeState; class PartitionGlobalMergeState { public: using GroupDataPtr = unique_ptr; PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data, hash_t hash_bin); bool IsSorted() const { lock_guard guard(lock); return stage == PartitionSortStage::SORTED; } bool AssignTask(PartitionLocalMergeState &local_state); bool TryPrepareNextStage(); void CompleteTask(); PartitionGlobalSinkState &sink; GroupDataPtr group_data; PartitionGlobalHashGroup *hash_group; GlobalSortState *global_sort; private: mutable mutex lock; PartitionSortStage stage; idx_t total_tasks; idx_t tasks_assigned; idx_t tasks_completed; }; class PartitionLocalMergeState { public: PartitionLocalMergeState() : merge_state(nullptr), stage(PartitionSortStage::INIT) { finished = true; } bool TaskFinished() { return finished; } void Prepare(); void Merge(); void ExecuteTask(); PartitionGlobalMergeState *merge_state; PartitionSortStage stage; atomic finished; }; class PartitionGlobalMergeStates { public: using PartitionGlobalMergeStatePtr = unique_ptr; explicit PartitionGlobalMergeStates(PartitionGlobalSinkState &sink); vector states; }; class PartitionMergeEvent : public BasePipelineEvent { public: PartitionMergeEvent(PartitionGlobalSinkState &gstate_p, Pipeline &pipeline_p) : BasePipelineEvent(pipeline_p), gstate(gstate_p), merge_states(gstate_p) { } PartitionGlobalSinkState &gstate; PartitionGlobalMergeStates merge_states; public: void Schedule() override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/set_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class SetType : uint8_t { SET = 0, RESET = 1 }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/subquery_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //===--------------------------------------------------------------------===// // Subquery Types //===--------------------------------------------------------------------===// enum class SubqueryType : uint8_t { INVALID = 0, SCALAR = 1, // Regular scalar subquery EXISTS = 2, // EXISTS (SELECT...) NOT_EXISTS = 3, // NOT EXISTS(SELECT...) ANY = 4, // x = ANY(SELECT...) OR x IN (SELECT...) }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/date_part_specifier.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class DatePartSpecifier : uint8_t { YEAR, MONTH, DAY, DECADE, CENTURY, MILLENNIUM, MICROSECONDS, MILLISECONDS, SECOND, MINUTE, HOUR, EPOCH, DOW, ISODOW, WEEK, ISOYEAR, QUARTER, DOY, YEARWEEK, ERA, TIMEZONE, TIMEZONE_HOUR, TIMEZONE_MINUTE }; DUCKDB_API bool TryGetDatePartSpecifier(const string &specifier, DatePartSpecifier &result); DUCKDB_API DatePartSpecifier GetDatePartSpecifier(const string &specifier); } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/joinref_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //===--------------------------------------------------------------------===// // Join Reference Types //===--------------------------------------------------------------------===// enum class JoinRefType : uint8_t { REGULAR, // Explicit conditions NATURAL, // Implied conditions CROSS, // No condition POSITIONAL, // Positional condition ASOF // AsOf conditions }; const char *ToString(JoinRefType value); } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/set_operation_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class SetOperationType : uint8_t { NONE = 0, UNION = 1, EXCEPT = 2, INTERSECT = 3, UNION_BY_NAME = 4 }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/aggregate_handling.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //===---- enum class AggregateHandling : uint8_t { STANDARD_HANDLING, // standard handling as in the SELECT clause NO_AGGREGATES_ALLOWED, // no aggregates allowed: any aggregates in this node will result in an error FORCE_AGGREGATES // force aggregates: any non-aggregate select list entry will become a GROUP }; const char *ToString(AggregateHandling value); } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/index/art/art_node.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/index/art/fixed_size_allocator.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/index/art/swizzleable_pointer.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { // classes class MetaBlockReader; // structs struct BlockPointer; //! SwizzleablePointer provides functions on a (possibly) swizzled pointer. If the swizzle flag is set, then the //! pointer points to a storage address (and has no type), otherwise the pointer has a type and stores //! other information (e.g., a buffer location) class SwizzleablePointer { public: //! Constructs an empty SwizzleablePointer SwizzleablePointer() : swizzle_flag(0), type(0), offset(0), buffer_id(0) {}; //! Constructs a swizzled pointer from a buffer ID and an offset explicit SwizzleablePointer(MetaBlockReader &reader); //! Constructs a non-swizzled pointer from a buffer ID and an offset SwizzleablePointer(uint32_t offset, uint32_t buffer_id) : swizzle_flag(0), type(0), offset(offset), buffer_id(buffer_id) {}; //! The swizzle flag, set if swizzled, not set otherwise uint8_t swizzle_flag : 1; //! The type of the pointer, zero if not set uint8_t type : 7; //! The offset of a memory location uint32_t offset : 24; //! The buffer ID of a memory location uint32_t buffer_id : 32; public: //! Checks if the pointer is swizzled inline bool IsSwizzled() const { return swizzle_flag; } //! Returns true, if neither the swizzle flag nor the type is set, and false otherwise inline bool IsSet() const { return swizzle_flag || type; } //! Reset the pointer inline void Reset() { swizzle_flag = 0; type = 0; } }; } // namespace duckdb namespace duckdb { struct BufferEntry { BufferEntry(const data_ptr_t &ptr, const idx_t &allocation_count) : ptr(ptr), allocation_count(allocation_count) { } data_ptr_t ptr; idx_t allocation_count; }; //! The FixedSizeAllocator provides pointers to fixed-size sections of pre-allocated memory buffers. //! The pointers are SwizzleablePointers, and the leftmost byte (swizzle flag and type) must always be zero. class FixedSizeAllocator { public: //! Fixed size of the buffers static constexpr idx_t BUFFER_ALLOC_SIZE = Storage::BLOCK_ALLOC_SIZE; //! We can vacuum 10% or more of the total memory usage of the allocator static constexpr uint8_t VACUUM_THRESHOLD = 10; //! Constants for fast offset calculations in the bitmask static constexpr idx_t BASE[] = {0x00000000FFFFFFFF, 0x0000FFFF, 0x00FF, 0x0F, 0x3, 0x1}; static constexpr uint8_t SHIFT[] = {32, 16, 8, 4, 2, 1}; public: explicit FixedSizeAllocator(const idx_t allocation_size, Allocator &allocator); ~FixedSizeAllocator(); //! Allocation size of one element in a buffer idx_t allocation_size; //! Total number of allocations idx_t total_allocations; //! Number of validity_t values in the bitmask idx_t bitmask_count; //! First starting byte of the payload idx_t allocation_offset; //! Number of possible allocations per buffer idx_t allocations_per_buffer; //! Buffers containing the data vector buffers; //! Buffers with free space unordered_set buffers_with_free_space; //! Minimum buffer ID of buffers that can be vacuumed idx_t min_vacuum_buffer_id; //! Buffer manager of the database instance Allocator &allocator; public: //! Get a new pointer to data, might cause a new buffer allocation SwizzleablePointer New(); //! Free the data of the pointer void Free(const SwizzleablePointer ptr); //! Get the data of the pointer template inline T *Get(const SwizzleablePointer ptr) const { return (T *)Get(ptr); } //! Resets the allocator, which e.g. becomes necessary during DELETE FROM table void Reset(); //! Returns the allocated memory size in bytes inline idx_t GetMemoryUsage() const { return buffers.size() * BUFFER_ALLOC_SIZE; } //! Merge another FixedSizeAllocator with this allocator. Both must have the same allocation size void Merge(FixedSizeAllocator &other); //! Initialize a vacuum operation, and return true, if the allocator needs a vacuum bool InitializeVacuum(); //! Finalize a vacuum operation by freeing all buffers exceeding the min_vacuum_buffer_id void FinalizeVacuum(); //! Returns true, if a pointer qualifies for a vacuum operation, and false otherwise inline bool NeedsVacuum(const SwizzleablePointer ptr) const { if (ptr.buffer_id >= min_vacuum_buffer_id) { return true; } return false; } //! Vacuums a pointer SwizzleablePointer VacuumPointer(const SwizzleablePointer ptr); //! Verify that the allocation counts match the existing positions on the buffers void Verify() const; private: //! Returns the data_ptr_t of a pointer inline data_ptr_t Get(const SwizzleablePointer ptr) const { D_ASSERT(ptr.buffer_id < buffers.size()); D_ASSERT(ptr.offset < allocations_per_buffer); return buffers[ptr.buffer_id].ptr + ptr.offset * allocation_size + allocation_offset; } //! Returns the first free offset in a bitmask uint32_t GetOffset(ValidityMask &mask, const idx_t allocation_count); }; } // namespace duckdb namespace duckdb { // classes enum class NType : uint8_t { PREFIX_SEGMENT = 1, LEAF_SEGMENT = 2, LEAF = 3, NODE_4 = 4, NODE_16 = 5, NODE_48 = 6, NODE_256 = 7 }; class ART; class Node; class Prefix; class MetaBlockReader; class MetaBlockWriter; // structs struct BlockPointer; struct ARTFlags; //! The ARTNode is the swizzleable pointer class of the ART index. //! If the ARTNode pointer is not swizzled, then the leftmost byte identifies the NType. //! The remaining bytes are the position in the respective ART buffer. class Node : public SwizzleablePointer { public: // constants (this allows testing performance with different ART node sizes) //! Node prefixes (NOTE: this should always hold: PREFIX_SEGMENT_SIZE >= PREFIX_INLINE_BYTES) static constexpr uint32_t PREFIX_INLINE_BYTES = 8; static constexpr uint32_t PREFIX_SEGMENT_SIZE = 32; //! Node thresholds static constexpr uint8_t NODE_48_SHRINK_THRESHOLD = 12; static constexpr uint8_t NODE_256_SHRINK_THRESHOLD = 36; //! Node sizes static constexpr uint8_t NODE_4_CAPACITY = 4; static constexpr uint8_t NODE_16_CAPACITY = 16; static constexpr uint8_t NODE_48_CAPACITY = 48; static constexpr uint16_t NODE_256_CAPACITY = 256; //! Other constants static constexpr uint8_t EMPTY_MARKER = 48; static constexpr uint32_t LEAF_SEGMENT_SIZE = 8; public: //! Constructs an empty ARTNode Node(); //! Constructs a swizzled pointer from a block ID and an offset explicit Node(MetaBlockReader &reader); //! Get a new pointer to a node, might cause a new buffer allocation, and initialize it static void New(ART &art, Node &node, const NType type); //! Free the node (and its subtree) static void Free(ART &art, Node &node); inline bool operator==(const Node &node) const { return swizzle_flag == node.swizzle_flag && type == node.type && offset == node.offset && buffer_id == node.buffer_id; } //! Retrieve the node type from the leftmost byte inline NType DecodeARTNodeType() const { D_ASSERT(!IsSwizzled()); D_ASSERT(type >= (uint8_t)NType::PREFIX_SEGMENT); D_ASSERT(type <= (uint8_t)NType::NODE_256); return NType(type); } //! Set the pointer inline void SetPtr(const SwizzleablePointer ptr) { swizzle_flag = ptr.swizzle_flag; type = ptr.type; offset = ptr.offset; buffer_id = ptr.buffer_id; } //! Replace the child node at the respective byte void ReplaceChild(const ART &art, const uint8_t byte, const Node child); //! Insert the child node at byte static void InsertChild(ART &art, Node &node, const uint8_t byte, const Node child); //! Delete the child node at the respective byte static void DeleteChild(ART &art, Node &node, const uint8_t byte); //! Get the child for the respective byte in the node optional_ptr GetChild(ART &art, const uint8_t byte) const; //! Get the first child that is greater or equal to the specific byte optional_ptr GetNextChild(ART &art, uint8_t &byte, const bool deserialize = true) const; //! Serialize the node BlockPointer Serialize(ART &art, MetaBlockWriter &writer); //! Deserialize the node void Deserialize(ART &art); //! Returns the string representation of the node, or only traverses and verifies the node and its subtree string VerifyAndToString(ART &art, const bool only_verify); //! Returns the capacity of the node idx_t GetCapacity() const; //! Returns a pointer to the prefix of the node Prefix &GetPrefix(ART &art); //! Returns the matching node type for a given count static NType GetARTNodeTypeByCount(const idx_t count); //! Get references to the different allocators static FixedSizeAllocator &GetAllocator(const ART &art, NType type); //! Initializes a merge by fully deserializing the subtree of the node and incrementing its buffer IDs void InitializeMerge(ART &art, const ARTFlags &flags); //! Merge another node into this node bool Merge(ART &art, Node &other); //! Merge two nodes by first resolving their prefixes bool ResolvePrefixes(ART &art, Node &other); //! Merge two nodes that have no prefix or the same prefix bool MergeInternal(ART &art, Node &other); //! Vacuum all nodes that exceed their respective vacuum thresholds static void Vacuum(ART &art, Node &node, const ARTFlags &flags); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/operator/persistent/base_csv_reader.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/chunk_collection.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class Allocator; class ClientContext; //! A ChunkCollection represents a set of DataChunks that all have the same //! types /*! A ChunkCollection represents a set of DataChunks concatenated together in a list. Individual values of the collection can be iterated over using the iterator. It is also possible to iterate directly over the chunks for more direct access. */ class ChunkCollection { public: explicit ChunkCollection(Allocator &allocator); explicit ChunkCollection(ClientContext &context); //! The types of columns in the ChunkCollection vector &Types() { return types; } const vector &Types() const { return types; } //! The amount of rows in the ChunkCollection const idx_t &Count() const { return count; } //! The amount of columns in the ChunkCollection idx_t ColumnCount() const { return types.size(); } //! Append a new DataChunk directly to this ChunkCollection DUCKDB_API void Append(DataChunk &new_chunk); //! Append a new DataChunk directly to this ChunkCollection DUCKDB_API void Append(unique_ptr new_chunk); //! Append another ChunkCollection directly to this ChunkCollection DUCKDB_API void Append(ChunkCollection &other); //! Merge is like Append but messes up the order and destroys the other collection DUCKDB_API void Merge(ChunkCollection &other); //! Fuse adds new columns to the right of the collection DUCKDB_API void Fuse(ChunkCollection &other); DUCKDB_API void Verify(); //! Gets the value of the column at the specified index DUCKDB_API Value GetValue(idx_t column, idx_t index); //! Sets the value of the column at the specified index DUCKDB_API void SetValue(idx_t column, idx_t index, const Value &value); //! Copy a single cell to a target vector DUCKDB_API void CopyCell(idx_t column, idx_t index, Vector &target, idx_t target_offset); DUCKDB_API string ToString() const; DUCKDB_API void Print() const; //! Gets a reference to the chunk at the given index DataChunk &GetChunkForRow(idx_t row_index) { return *chunks[LocateChunk(row_index)]; } //! Gets a reference to the chunk at the given index DataChunk &GetChunk(idx_t chunk_index) { D_ASSERT(chunk_index < chunks.size()); return *chunks[chunk_index]; } const DataChunk &GetChunk(idx_t chunk_index) const { D_ASSERT(chunk_index < chunks.size()); return *chunks[chunk_index]; } const vector> &Chunks() { return chunks; } idx_t ChunkCount() const { return chunks.size(); } void Reset() { count = 0; chunks.clear(); types.clear(); } unique_ptr Fetch() { if (ChunkCount() == 0) { return nullptr; } auto res = std::move(chunks[0]); chunks.erase(chunks.begin() + 0); return res; } //! Locates the chunk that belongs to the specific index idx_t LocateChunk(idx_t index) { idx_t result = index / STANDARD_VECTOR_SIZE; D_ASSERT(result < chunks.size()); return result; } Allocator &GetAllocator() { return allocator; } private: Allocator &allocator; //! The total amount of elements in the collection idx_t count; //! The set of data chunks in the collection vector> chunks; //! The types of the ChunkCollection vector types; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/operator/persistent/csv_reader_options.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/operator/persistent/csv_buffer.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/operator/persistent/csv_file_handle.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class Allocator; class FileSystem; struct CSVFileHandle { public: CSVFileHandle(FileSystem &fs, Allocator &allocator, unique_ptr file_handle_p, const string &path_p, FileCompressionType compression, bool enable_reset = true); mutex main_mutex; public: bool CanSeek(); void Seek(idx_t position); idx_t SeekPosition(); void Reset(); bool OnDiskFile(); idx_t FileSize(); bool FinishedReading(); idx_t Read(void *buffer, idx_t nr_bytes); string ReadLine(); void DisableReset(); static unique_ptr OpenFileHandle(FileSystem &fs, Allocator &allocator, const string &path, FileCompressionType compression); static unique_ptr OpenFile(FileSystem &fs, Allocator &allocator, const string &path, FileCompressionType compression, bool enable_reset); private: FileSystem &fs; Allocator &allocator; unique_ptr file_handle; string path; FileCompressionType compression; bool reset_enabled = true; bool can_seek = false; bool on_disk_file = false; idx_t file_size = 0; // reset support AllocatedData cached_buffer; idx_t read_position = 0; idx_t buffer_size = 0; idx_t buffer_capacity = 0; idx_t requested_bytes = 0; }; } // namespace duckdb namespace duckdb { class CSVBuffer { public: //! Colossal buffer size for multi-threading static constexpr idx_t INITIAL_BUFFER_SIZE_COLOSSAL = 32000000; // 32MB //! Constructor for Initial Buffer CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle &file_handle, idx_t &global_csv_current_position, idx_t file_number); //! Constructor for `Next()` Buffers CSVBuffer(ClientContext &context, BufferHandle handle, idx_t buffer_size_p, idx_t actual_size_p, bool final_buffer, idx_t global_csv_current_position, idx_t file_number); //! Creates a new buffer with the next part of the CSV File unique_ptr Next(CSVFileHandle &file_handle, idx_t buffer_size, idx_t &global_csv_current_position, idx_t file_number); //! Gets the buffer actual size idx_t GetBufferSize(); //! Gets the start position of the buffer, only relevant for the first time it's scanned idx_t GetStart(); //! If this buffer is the last buffer of the CSV File bool IsCSVFileLastBuffer(); //! If this buffer is the first buffer of the CSV File bool IsCSVFileFirstBuffer(); idx_t GetCSVGlobalStart(); idx_t GetFileNumber(); BufferHandle AllocateBuffer(idx_t buffer_size); char *Ptr() { return char_ptr_cast(handle.Ptr()); } private: ClientContext &context; BufferHandle handle; //! Actual size can be smaller than the buffer size in case we allocate it too optimistically. idx_t actual_size; //! We need to check for Byte Order Mark, to define the start position of this buffer //! https://en.wikipedia.org/wiki/Byte_order_mark#UTF-8 idx_t start_position = 0; //! If this is the last buffer of the CSV File bool last_buffer = false; //! If this is the first buffer of the CSV File bool first_buffer = false; //! Global position from the CSV File where this buffer starts idx_t global_csv_start = 0; //! Number of the file that is in this buffer idx_t file_number = 0; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/multi_file_reader_options.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/hive_partitioning.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/optimizer/filter_combiner.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/filter/conjunction_filter.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class ConjunctionFilter : public TableFilter { public: ConjunctionFilter(TableFilterType filter_type_p) : TableFilter(filter_type_p) { } virtual ~ConjunctionFilter() { } //! The filters of this conjunction vector> child_filters; public: virtual FilterPropagateResult CheckStatistics(BaseStatistics &stats) = 0; virtual string ToString(const string &column_name) = 0; virtual bool Equals(const TableFilter &other) const { return TableFilter::Equals(other); } }; class ConjunctionOrFilter : public ConjunctionFilter { public: static constexpr const TableFilterType TYPE = TableFilterType::CONJUNCTION_OR; public: ConjunctionOrFilter(); public: FilterPropagateResult CheckStatistics(BaseStatistics &stats) override; string ToString(const string &column_name) override; bool Equals(const TableFilter &other) const override; void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(FieldReader &source); }; class ConjunctionAndFilter : public ConjunctionFilter { public: static constexpr const TableFilterType TYPE = TableFilterType::CONJUNCTION_AND; public: ConjunctionAndFilter(); public: FilterPropagateResult CheckStatistics(BaseStatistics &stats) override; string ToString(const string &column_name) override; bool Equals(const TableFilter &other) const override; void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(FieldReader &source); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/filter/constant_filter.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class ConstantFilter : public TableFilter { public: static constexpr const TableFilterType TYPE = TableFilterType::CONSTANT_COMPARISON; public: ConstantFilter(ExpressionType comparison_type, Value constant); //! The comparison type (e.g. COMPARE_EQUAL, COMPARE_GREATERTHAN, COMPARE_LESSTHAN, ...) ExpressionType comparison_type; //! The constant value to filter on Value constant; public: FilterPropagateResult CheckStatistics(BaseStatistics &stats) override; string ToString(const string &column_name) override; bool Equals(const TableFilter &other) const override; void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(FieldReader &source); }; } // namespace duckdb #include #include namespace duckdb { class Optimizer; enum class ValueComparisonResult { PRUNE_LEFT, PRUNE_RIGHT, UNSATISFIABLE_CONDITION, PRUNE_NOTHING }; enum class FilterResult { UNSATISFIABLE, SUCCESS, UNSUPPORTED }; //! The FilterCombiner combines several filters and generates a logically equivalent set that is more efficient //! Amongst others: //! (1) it prunes obsolete filter conditions: i.e. [X > 5 and X > 7] => [X > 7] //! (2) it generates new filters for expressions in the same equivalence set: i.e. [X = Y and X = 500] => [Y = 500] //! (3) it prunes branches that have unsatisfiable filters: i.e. [X = 5 AND X > 6] => FALSE, prune branch class FilterCombiner { public: explicit FilterCombiner(ClientContext &context); explicit FilterCombiner(Optimizer &optimizer); ClientContext &context; public: struct ExpressionValueInformation { Value constant; ExpressionType comparison_type; }; FilterResult AddFilter(unique_ptr expr); void GenerateFilters(const std::function filter)> &callback); bool HasFilters(); TableFilterSet GenerateTableScanFilters(vector &column_ids); // vector> GenerateZonemapChecks(vector &column_ids, vector> // &pushed_filters); private: FilterResult AddFilter(Expression &expr); FilterResult AddBoundComparisonFilter(Expression &expr); FilterResult AddTransitiveFilters(BoundComparisonExpression &comparison); unique_ptr FindTransitiveFilter(Expression &expr); // unordered_map> // FindZonemapChecks(vector &column_ids, unordered_set ¬_constants, Expression *filter); Expression &GetNode(Expression &expr); idx_t GetEquivalenceSet(Expression &expr); FilterResult AddConstantComparison(vector &info_list, ExpressionValueInformation info); // // //! Functions used to push and generate OR Filters // void LookUpConjunctions(Expression *expr); // bool BFSLookUpConjunctions(BoundConjunctionExpression *conjunction); // void VerifyOrsToPush(Expression &expr); // // bool UpdateConjunctionFilter(BoundComparisonExpression *comparison_expr); // bool UpdateFilterByColumn(BoundColumnRefExpression *column_ref, BoundComparisonExpression *comparison_expr); // void GenerateORFilters(TableFilterSet &table_filter, vector &column_ids); // // template // void GenerateConjunctionFilter(BoundConjunctionExpression *conjunction, ConjunctionFilter *last_conj_filter) { // auto new_filter = NextConjunctionFilter(conjunction); // auto conj_filter_ptr = (ConjunctionFilter *)new_filter.get(); // last_conj_filter->child_filters.push_back(std::move(new_filter)); // last_conj_filter = conj_filter_ptr; // } // // template // unique_ptr NextConjunctionFilter(BoundConjunctionExpression *conjunction) { // unique_ptr conj_filter = make_uniq(); // for (auto &expr : conjunction->children) { // auto comp_expr = (BoundComparisonExpression *)expr.get(); // auto &const_expr = // (comp_expr->left->type == ExpressionType::VALUE_CONSTANT) ? *comp_expr->left : *comp_expr->right; // auto const_value = ExpressionExecutor::EvaluateScalar(const_expr); // auto const_filter = make_uniq(comp_expr->type, const_value); // conj_filter->child_filters.push_back(std::move(const_filter)); // } // return std::move(conj_filter); // } private: vector> remaining_filters; expression_map_t> stored_expressions; expression_map_t equivalence_set_map; unordered_map> constant_values; unordered_map>> equivalence_map; idx_t set_index = 0; // // //! Structures used for OR Filters // // struct ConjunctionsToPush { // BoundConjunctionExpression *root_or; // // // only preserve AND if there is a single column in the expression // bool preserve_and = true; // // // conjunction chain for this column // vector> conjunctions; // }; // // expression_map_t>> map_col_conjunctions; // vector vec_colref_insertion_order; // // BoundConjunctionExpression *cur_root_or; // BoundConjunctionExpression *cur_conjunction; // // BoundColumnRefExpression *cur_colref_to_push; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/optimizer/statistics_propagator.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/column_binding_map.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct ColumnBindingHashFunction { uint64_t operator()(const ColumnBinding &a) const { return CombineHash(Hash(a.table_index), Hash(a.column_index)); } }; struct ColumnBindingEquality { bool operator()(const ColumnBinding &a, const ColumnBinding &b) const { return a == b; } }; template using column_binding_map_t = unordered_map; using column_binding_set_t = unordered_set; } // namespace duckdb namespace duckdb { class ClientContext; class LogicalOperator; class TableFilter; struct BoundOrderByNode; class StatisticsPropagator { public: explicit StatisticsPropagator(ClientContext &context); unique_ptr PropagateStatistics(unique_ptr &node_ptr); private: //! Propagate statistics through an operator unique_ptr PropagateStatistics(LogicalOperator &node, unique_ptr *node_ptr); unique_ptr PropagateStatistics(LogicalFilter &op, unique_ptr *node_ptr); unique_ptr PropagateStatistics(LogicalGet &op, unique_ptr *node_ptr); unique_ptr PropagateStatistics(LogicalJoin &op, unique_ptr *node_ptr); unique_ptr PropagateStatistics(LogicalPositionalJoin &op, unique_ptr *node_ptr); unique_ptr PropagateStatistics(LogicalProjection &op, unique_ptr *node_ptr); void PropagateStatistics(LogicalComparisonJoin &op, unique_ptr *node_ptr); void PropagateStatistics(LogicalAnyJoin &op, unique_ptr *node_ptr); unique_ptr PropagateStatistics(LogicalSetOperation &op, unique_ptr *node_ptr); unique_ptr PropagateStatistics(LogicalAggregate &op, unique_ptr *node_ptr); unique_ptr PropagateStatistics(LogicalCrossProduct &op, unique_ptr *node_ptr); unique_ptr PropagateStatistics(LogicalLimit &op, unique_ptr *node_ptr); unique_ptr PropagateStatistics(LogicalOrder &op, unique_ptr *node_ptr); unique_ptr PropagateStatistics(LogicalWindow &op, unique_ptr *node_ptr); unique_ptr PropagateChildren(LogicalOperator &node, unique_ptr *node_ptr); //! Return statistics from a constant value unique_ptr StatisticsFromValue(const Value &input); //! Run a comparison with two sets of statistics, returns if the comparison will always returns true/false or not FilterPropagateResult PropagateComparison(BaseStatistics &left, BaseStatistics &right, ExpressionType comparison); //! Update filter statistics from a filter with a constant void UpdateFilterStatistics(BaseStatistics &input, ExpressionType comparison_type, const Value &constant); //! Update statistics from a filter between two stats void UpdateFilterStatistics(BaseStatistics &lstats, BaseStatistics &rstats, ExpressionType comparison_type); //! Update filter statistics from a generic comparison void UpdateFilterStatistics(Expression &left, Expression &right, ExpressionType comparison_type); //! Update filter statistics from an expression void UpdateFilterStatistics(Expression &condition); //! Set the statistics of a specific column binding to not contain null values void SetStatisticsNotNull(ColumnBinding binding); //! Run a comparison between the statistics and the table filter; returns the prune result FilterPropagateResult PropagateTableFilter(BaseStatistics &stats, TableFilter &filter); //! Update filter statistics from a TableFilter void UpdateFilterStatistics(BaseStatistics &input, TableFilter &filter); //! Add cardinalities together (i.e. new max is stats.max + new_stats.max): used for union void AddCardinalities(unique_ptr &stats, NodeStatistics &new_stats); //! Multiply the cardinalities together (i.e. new max cardinality is stats.max * new_stats.max): used for //! joins/cross products void MultiplyCardinalities(unique_ptr &stats, NodeStatistics &new_stats); unique_ptr PropagateExpression(unique_ptr &expr); unique_ptr PropagateExpression(Expression &expr, unique_ptr *expr_ptr); unique_ptr PropagateExpression(BoundAggregateExpression &expr, unique_ptr *expr_ptr); unique_ptr PropagateExpression(BoundBetweenExpression &expr, unique_ptr *expr_ptr); unique_ptr PropagateExpression(BoundCaseExpression &expr, unique_ptr *expr_ptr); unique_ptr PropagateExpression(BoundCastExpression &expr, unique_ptr *expr_ptr); unique_ptr PropagateExpression(BoundConjunctionExpression &expr, unique_ptr *expr_ptr); unique_ptr PropagateExpression(BoundFunctionExpression &expr, unique_ptr *expr_ptr); unique_ptr PropagateExpression(BoundComparisonExpression &expr, unique_ptr *expr_ptr); unique_ptr PropagateExpression(BoundConstantExpression &expr, unique_ptr *expr_ptr); unique_ptr PropagateExpression(BoundColumnRefExpression &expr, unique_ptr *expr_ptr); unique_ptr PropagateExpression(BoundOperatorExpression &expr, unique_ptr *expr_ptr); void PropagateAndCompress(unique_ptr &expr, unique_ptr &stats); void ReplaceWithEmptyResult(unique_ptr &node); bool ExpressionIsConstant(Expression &expr, const Value &val); bool ExpressionIsConstantOrNull(Expression &expr, const Value &val); private: ClientContext &context; //! The map of ColumnBinding -> statistics for the various nodes column_binding_map_t> statistics_map; //! Node stats for the current node unique_ptr node_stats; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/expression_iterator.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { class BoundQueryNode; class BoundTableRef; class ExpressionIterator { public: static void EnumerateChildren(const Expression &expression, const std::function &callback); static void EnumerateChildren(Expression &expression, const std::function &callback); static void EnumerateChildren(Expression &expression, const std::function &child)> &callback); static void EnumerateExpression(unique_ptr &expr, const std::function &callback); static void EnumerateTableRefChildren(BoundTableRef &ref, const std::function &callback); static void EnumerateQueryNodeChildren(BoundQueryNode &node, const std::function &callback); }; } // namespace duckdb // LICENSE_CHANGE_BEGIN // The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 // See the end of this file for a list // Copyright 2003-2009 The RE2 Authors. All Rights Reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #ifndef RE2_RE2_H_ #define RE2_RE2_H_ // C++ interface to the re2 regular-expression library. // RE2 supports Perl-style regular expressions (with extensions like // \d, \w, \s, ...). // // ----------------------------------------------------------------------- // REGEXP SYNTAX: // // This module uses the re2 library and hence supports // its syntax for regular expressions, which is similar to Perl's with // some of the more complicated things thrown away. In particular, // backreferences and generalized assertions are not available, nor is \Z. // // See https://github.com/google/re2/wiki/Syntax for the syntax // supported by RE2, and a comparison with PCRE and PERL regexps. // // For those not familiar with Perl's regular expressions, // here are some examples of the most commonly used extensions: // // "hello (\\w+) world" -- \w matches a "word" character // "version (\\d+)" -- \d matches a digit // "hello\\s+world" -- \s matches any whitespace character // "\\b(\\w+)\\b" -- \b matches non-empty string at word boundary // "(?i)hello" -- (?i) turns on case-insensitive matching // "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible // // ----------------------------------------------------------------------- // MATCHING INTERFACE: // // The "FullMatch" operation checks that supplied text matches a // supplied pattern exactly. // // Example: successful match // CHECK(RE2::FullMatch("hello", "h.*o")); // // Example: unsuccessful match (requires full match): // CHECK(!RE2::FullMatch("hello", "e")); // // ----------------------------------------------------------------------- // UTF-8 AND THE MATCHING INTERFACE: // // By default, the pattern and input text are interpreted as UTF-8. // The RE2::Latin1 option causes them to be interpreted as Latin-1. // // Example: // CHECK(RE2::FullMatch(utf8_string, RE2(utf8_pattern))); // CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern, RE2::Latin1))); // // ----------------------------------------------------------------------- // MATCHING WITH SUBSTRING EXTRACTION: // // You can supply extra pointer arguments to extract matched substrings. // On match failure, none of the pointees will have been modified. // On match success, the substrings will be converted (as necessary) and // their values will be assigned to their pointees until all conversions // have succeeded or one conversion has failed. // On conversion failure, the pointees will be in an indeterminate state // because the caller has no way of knowing which conversion failed. // However, conversion cannot fail for types like string and StringPiece // that do not inspect the substring contents. Hence, in the common case // where all of the pointees are of such types, failure is always due to // match failure and thus none of the pointees will have been modified. // // Example: extracts "ruby" into "s" and 1234 into "i" // int i; // std::string s; // CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i)); // // Example: fails because string cannot be stored in integer // CHECK(!RE2::FullMatch("ruby", "(.*)", &i)); // // Example: fails because there aren't enough sub-patterns // CHECK(!RE2::FullMatch("ruby:1234", "\\w+:\\d+", &s)); // // Example: does not try to extract any extra sub-patterns // CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s)); // // Example: does not try to extract into NULL // CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i)); // // Example: integer overflow causes failure // CHECK(!RE2::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i)); // // NOTE(rsc): Asking for substrings slows successful matches quite a bit. // This may get a little faster in the future, but right now is slower // than PCRE. On the other hand, failed matches run *very* fast (faster // than PCRE), as do matches without substring extraction. // // ----------------------------------------------------------------------- // PARTIAL MATCHES // // You can use the "PartialMatch" operation when you want the pattern // to match any substring of the text. // // Example: simple search for a string: // CHECK(RE2::PartialMatch("hello", "ell")); // // Example: find first number in a string // int number; // CHECK(RE2::PartialMatch("x*100 + 20", "(\\d+)", &number)); // CHECK_EQ(number, 100); // // ----------------------------------------------------------------------- // PRE-COMPILED REGULAR EXPRESSIONS // // RE2 makes it easy to use any string as a regular expression, without // requiring a separate compilation step. // // If speed is of the essence, you can create a pre-compiled "RE2" // object from the pattern and use it multiple times. If you do so, // you can typically parse text faster than with sscanf. // // Example: precompile pattern for faster matching: // RE2 pattern("h.*o"); // while (ReadLine(&str)) { // if (RE2::FullMatch(str, pattern)) ...; // } // // ----------------------------------------------------------------------- // SCANNING TEXT INCREMENTALLY // // The "Consume" operation may be useful if you want to repeatedly // match regular expressions at the front of a string and skip over // them as they match. This requires use of the "StringPiece" type, // which represents a sub-range of a real string. // // Example: read lines of the form "var = value" from a string. // std::string contents = ...; // Fill string somehow // StringPiece input(contents); // Wrap a StringPiece around it // // std::string var; // int value; // while (RE2::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) { // ...; // } // // Each successful call to "Consume" will set "var/value", and also // advance "input" so it points past the matched text. Note that if the // regular expression matches an empty string, input will advance // by 0 bytes. If the regular expression being used might match // an empty string, the loop body must check for this case and either // advance the string or break out of the loop. // // The "FindAndConsume" operation is similar to "Consume" but does not // anchor your match at the beginning of the string. For example, you // could extract all words from a string by repeatedly calling // RE2::FindAndConsume(&input, "(\\w+)", &word) // // ----------------------------------------------------------------------- // USING VARIABLE NUMBER OF ARGUMENTS // // The above operations require you to know the number of arguments // when you write the code. This is not always possible or easy (for // example, the regular expression may be calculated at run time). // You can use the "N" version of the operations when the number of // match arguments are determined at run time. // // Example: // const RE2::Arg* args[10]; // int n; // // ... populate args with pointers to RE2::Arg values ... // // ... set n to the number of RE2::Arg objects ... // bool match = RE2::FullMatchN(input, pattern, args, n); // // The last statement is equivalent to // // bool match = RE2::FullMatch(input, pattern, // *args[0], *args[1], ..., *args[n - 1]); // // ----------------------------------------------------------------------- // PARSING HEX/OCTAL/C-RADIX NUMBERS // // By default, if you pass a pointer to a numeric value, the // corresponding text is interpreted as a base-10 number. You can // instead wrap the pointer with a call to one of the operators Hex(), // Octal(), or CRadix() to interpret the text in another base. The // CRadix operator interprets C-style "0" (base-8) and "0x" (base-16) // prefixes, but defaults to base-10. // // Example: // int a, b, c, d; // CHECK(RE2::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)", // RE2::Octal(&a), RE2::Hex(&b), RE2::CRadix(&c), RE2::CRadix(&d)); // will leave 64 in a, b, c, and d. #include #include #include #include #include #include // LICENSE_CHANGE_BEGIN // The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 // See the end of this file for a list // Copyright 2001-2010 The RE2 Authors. All Rights Reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #ifndef RE2_STRINGPIECE_H_ #define RE2_STRINGPIECE_H_ #ifdef min #undef min #endif // A string-like object that points to a sized piece of memory. // // Functions or methods may use const StringPiece& parameters to accept either // a "const char*" or a "string" value that will be implicitly converted to // a StringPiece. The implicit conversion means that it is often appropriate // to include this .h file in other files rather than forward-declaring // StringPiece as would be appropriate for most other Google classes. // // Systematic usage of StringPiece is encouraged as it will reduce unnecessary // conversions from "const char*" to "string" and back again. // // // Arghh! I wish C++ literals were "string". // Doing this simplifies the logic below. #ifndef __has_include #define __has_include(x) 0 #endif #include #include #include #include #include #include #if __has_include() && __cplusplus >= 201703L #include #endif namespace duckdb_re2 { class StringPiece { public: typedef std::char_traits traits_type; typedef char value_type; typedef char* pointer; typedef const char* const_pointer; typedef char& reference; typedef const char& const_reference; typedef const char* const_iterator; typedef const_iterator iterator; typedef std::reverse_iterator const_reverse_iterator; typedef const_reverse_iterator reverse_iterator; typedef size_t size_type; typedef ptrdiff_t difference_type; static const size_type npos = static_cast(-1); // We provide non-explicit singleton constructors so users can pass // in a "const char*" or a "string" wherever a "StringPiece" is // expected. StringPiece() : data_(NULL), size_(0) {} #if __has_include() && __cplusplus >= 201703L StringPiece(const std::string_view& str) : data_(str.data()), size_(str.size()) {} #endif StringPiece(const std::string& str) : data_(str.data()), size_(str.size()) {} StringPiece(const char* str) : data_(str), size_(str == NULL ? 0 : strlen(str)) {} StringPiece(const char* str, size_type len) : data_(str), size_(len) {} const_iterator begin() const { return data_; } const_iterator end() const { return data_ + size_; } const_reverse_iterator rbegin() const { return const_reverse_iterator(data_ + size_); } const_reverse_iterator rend() const { return const_reverse_iterator(data_); } size_type size() const { return size_; } size_type length() const { return size_; } bool empty() const { return size_ == 0; } const_reference operator[](size_type i) const { return data_[i]; } const_pointer data() const { return data_; } void remove_prefix(size_type n) { data_ += n; size_ -= n; } void remove_suffix(size_type n) { size_ -= n; } void set(const char* str) { data_ = str; size_ = str == NULL ? 0 : strlen(str); } void set(const char* str, size_type len) { data_ = str; size_ = len; } // Converts to `std::basic_string`. template explicit operator std::basic_string() const { if (!data_) return {}; return std::basic_string(data_, size_); } std::string as_string() const { return std::string(data_, size_); } // We also define ToString() here, since many other string-like // interfaces name the routine that converts to a C++ string // "ToString", and it's confusing to have the method that does that // for a StringPiece be called "as_string()". We also leave the // "as_string()" method defined here for existing code. std::string ToString() const { return std::string(data_, size_); } void CopyToString(std::string* target) const { target->assign(data_, size_); } void AppendToString(std::string* target) const { target->append(data_, size_); } size_type copy(char* buf, size_type n, size_type pos = 0) const; StringPiece substr(size_type pos = 0, size_type n = npos) const; int compare(const StringPiece& x) const { size_type min_size = std::min(size(), x.size()); if (min_size > 0) { int r = memcmp(data(), x.data(), min_size); if (r < 0) return -1; if (r > 0) return 1; } if (size() < x.size()) return -1; if (size() > x.size()) return 1; return 0; } // Does "this" start with "x"? bool starts_with(const StringPiece& x) const { return x.empty() || (size() >= x.size() && memcmp(data(), x.data(), x.size()) == 0); } // Does "this" end with "x"? bool ends_with(const StringPiece& x) const { return x.empty() || (size() >= x.size() && memcmp(data() + (size() - x.size()), x.data(), x.size()) == 0); } bool contains(const StringPiece& s) const { return find(s) != npos; } size_type find(const StringPiece& s, size_type pos = 0) const; size_type find(char c, size_type pos = 0) const; size_type rfind(const StringPiece& s, size_type pos = npos) const; size_type rfind(char c, size_type pos = npos) const; private: const_pointer data_; size_type size_; }; inline bool operator==(const StringPiece& x, const StringPiece& y) { StringPiece::size_type len = x.size(); if (len != y.size()) return false; return x.data() == y.data() || len == 0 || memcmp(x.data(), y.data(), len) == 0; } inline bool operator!=(const StringPiece& x, const StringPiece& y) { return !(x == y); } inline bool operator<(const StringPiece& x, const StringPiece& y) { StringPiece::size_type min_size = std::min(x.size(), y.size()); int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size); return (r < 0) || (r == 0 && x.size() < y.size()); } inline bool operator>(const StringPiece& x, const StringPiece& y) { return y < x; } inline bool operator<=(const StringPiece& x, const StringPiece& y) { return !(x > y); } inline bool operator>=(const StringPiece& x, const StringPiece& y) { return !(x < y); } // Allow StringPiece to be logged. std::ostream& operator<<(std::ostream& o, const StringPiece& p); } // namespace duckdb_re2 #endif // RE2_STRINGPIECE_H_ // LICENSE_CHANGE_END namespace duckdb_re2 { class Prog; class Regexp; } // namespace duckdb_re2 namespace duckdb_re2 { // Interface for regular expression matching. Also corresponds to a // pre-compiled regular expression. An "RE2" object is safe for // concurrent use by multiple threads. class RE2 { public: // We convert user-passed pointers into special Arg objects class Arg; class Options; // Defined in set.h. class Set; enum ErrorCode { NoError = 0, // Unexpected error ErrorInternal, // Parse errors ErrorBadEscape, // bad escape sequence ErrorBadCharClass, // bad character class ErrorBadCharRange, // bad character class range ErrorMissingBracket, // missing closing ] ErrorMissingParen, // missing closing ) ErrorTrailingBackslash, // trailing \ at end of regexp ErrorRepeatArgument, // repeat argument missing, e.g. "*" ErrorRepeatSize, // bad repetition argument ErrorRepeatOp, // bad repetition operator ErrorBadPerlOp, // bad perl operator ErrorBadUTF8, // invalid UTF-8 in regexp ErrorBadNamedCapture, // bad named capture group ErrorPatternTooLarge // pattern too large (compile failed) }; // Predefined common options. // If you need more complicated things, instantiate // an Option class, possibly passing one of these to // the Option constructor, change the settings, and pass that // Option class to the RE2 constructor. enum CannedOptions { DefaultOptions = 0, Latin1, // treat input as Latin-1 (default UTF-8) POSIX, // POSIX syntax, leftmost-longest match Quiet // do not log about regexp parse errors }; // Need to have the const char* and const std::string& forms for implicit // conversions when passing string literals to FullMatch and PartialMatch. // Otherwise the StringPiece form would be sufficient. #ifndef SWIG RE2(const char* pattern); RE2(const std::string& pattern); #endif RE2(const StringPiece& pattern); RE2(const StringPiece& pattern, const Options& options); ~RE2(); // Returns whether RE2 was created properly. bool ok() const { return error_code() == NoError; } // The string specification for this RE2. E.g. // RE2 re("ab*c?d+"); // re.pattern(); // "ab*c?d+" const std::string& pattern() const { return pattern_; } // If RE2 could not be created properly, returns an error string. // Else returns the empty string. const std::string& error() const { return *error_; } // If RE2 could not be created properly, returns an error code. // Else returns RE2::NoError (== 0). ErrorCode error_code() const { return error_code_; } // If RE2 could not be created properly, returns the offending // portion of the regexp. const std::string& error_arg() const { return error_arg_; } // Returns the program size, a very approximate measure of a regexp's "cost". // Larger numbers are more expensive than smaller numbers. int ProgramSize() const; int ReverseProgramSize() const; // EXPERIMENTAL! SUBJECT TO CHANGE! // Outputs the program fanout as a histogram bucketed by powers of 2. // Returns the number of the largest non-empty bucket. int ProgramFanout(std::map* histogram) const; int ReverseProgramFanout(std::map* histogram) const; // Returns the underlying Regexp; not for general use. // Returns entire_regexp_ so that callers don't need // to know about prefix_ and prefix_foldcase_. duckdb_re2::Regexp* Regexp() const { return entire_regexp_; } /***** The array-based matching interface ******/ // The functions here have names ending in 'N' and are used to implement // the functions whose names are the prefix before the 'N'. It is sometimes // useful to invoke them directly, but the syntax is awkward, so the 'N'-less // versions should be preferred. static bool FullMatchN(const StringPiece& text, const RE2& re, const Arg* const args[], int n); static bool PartialMatchN(const StringPiece& text, const RE2& re, const Arg* const args[], int n); static bool ConsumeN(StringPiece* input, const RE2& re, const Arg* const args[], int n); static bool FindAndConsumeN(StringPiece* input, const RE2& re, const Arg* const args[], int n); #ifndef SWIG private: template static inline bool Apply(F f, SP sp, const RE2& re) { return f(sp, re, NULL, 0); } template static inline bool Apply(F f, SP sp, const RE2& re, const A&... a) { const Arg* const args[] = {&a...}; const int n = sizeof...(a); return f(sp, re, args, n); } public: // In order to allow FullMatch() et al. to be called with a varying number // of arguments of varying types, we use two layers of variadic templates. // The first layer constructs the temporary Arg objects. The second layer // (above) constructs the array of pointers to the temporary Arg objects. /***** The useful part: the matching interface *****/ // Matches "text" against "re". If pointer arguments are // supplied, copies matched sub-patterns into them. // // You can pass in a "const char*" or a "std::string" for "text". // You can pass in a "const char*" or a "std::string" or a "RE2" for "re". // // The provided pointer arguments can be pointers to any scalar numeric // type, or one of: // std::string (matched piece is copied to string) // StringPiece (StringPiece is mutated to point to matched piece) // T (where "bool T::ParseFrom(const char*, size_t)" exists) // (void*)NULL (the corresponding matched sub-pattern is not copied) // // Returns true iff all of the following conditions are satisfied: // a. "text" matches "re" exactly // b. The number of matched sub-patterns is >= number of supplied pointers // c. The "i"th argument has a suitable type for holding the // string captured as the "i"th sub-pattern. If you pass in // NULL for the "i"th argument, or pass fewer arguments than // number of sub-patterns, "i"th captured sub-pattern is // ignored. // // CAVEAT: An optional sub-pattern that does not exist in the // matched string is assigned the empty string. Therefore, the // following will return false (because the empty string is not a // valid number): // int number; // RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number); template static bool FullMatch(const StringPiece& text, const RE2& re, A&&... a) { return Apply(FullMatchN, text, re, Arg(std::forward(a))...); } // Exactly like FullMatch(), except that "re" is allowed to match // a substring of "text". template static bool PartialMatch(const StringPiece& text, const RE2& re, A&&... a) { return Apply(PartialMatchN, text, re, Arg(std::forward(a))...); } // Like FullMatch() and PartialMatch(), except that "re" has to match // a prefix of the text, and "input" is advanced past the matched // text. Note: "input" is modified iff this routine returns true // and "re" matched a non-empty substring of "text". template static bool Consume(StringPiece* input, const RE2& re, A&&... a) { return Apply(ConsumeN, input, re, Arg(std::forward(a))...); } // Like Consume(), but does not anchor the match at the beginning of // the text. That is, "re" need not start its match at the beginning // of "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds // the next word in "s" and stores it in "word". template static bool FindAndConsume(StringPiece* input, const RE2& re, A&&... a) { return Apply(FindAndConsumeN, input, re, Arg(std::forward(a))...); } #endif // Replace the first match of "re" in "str" with "rewrite". // Within "rewrite", backslash-escaped digits (\1 to \9) can be // used to insert text matching corresponding parenthesized group // from the pattern. \0 in "rewrite" refers to the entire matching // text. E.g., // // std::string s = "yabba dabba doo"; // CHECK(RE2::Replace(&s, "b+", "d")); // // will leave "s" containing "yada dabba doo" // // Returns true if the pattern matches and a replacement occurs, // false otherwise. static bool Replace(std::string* str, const RE2& re, const StringPiece& rewrite); // Like Replace(), except replaces successive non-overlapping occurrences // of the pattern in the string with the rewrite. E.g. // // std::string s = "yabba dabba doo"; // CHECK(RE2::GlobalReplace(&s, "b+", "d")); // // will leave "s" containing "yada dada doo" // Replacements are not subject to re-matching. // // Because GlobalReplace only replaces non-overlapping matches, // replacing "ana" within "banana" makes only one replacement, not two. // // Returns the number of replacements made. static int GlobalReplace(std::string* str, const RE2& re, const StringPiece& rewrite); // Like Replace, except that if the pattern matches, "rewrite" // is copied into "out" with substitutions. The non-matching // portions of "text" are ignored. // // Returns true iff a match occurred and the extraction happened // successfully; if no match occurs, the string is left unaffected. // // REQUIRES: "text" must not alias any part of "*out". static bool Extract(const StringPiece& text, const RE2& re, const StringPiece& rewrite, std::string* out); // Escapes all potentially meaningful regexp characters in // 'unquoted'. The returned string, used as a regular expression, // will exactly match the original string. For example, // 1.5-2.0? // may become: // 1\.5\-2\.0\? static std::string QuoteMeta(const StringPiece& unquoted); // Computes range for any strings matching regexp. The min and max can in // some cases be arbitrarily precise, so the caller gets to specify the // maximum desired length of string returned. // // Assuming PossibleMatchRange(&min, &max, N) returns successfully, any // string s that is an anchored match for this regexp satisfies // min <= s && s <= max. // // Note that PossibleMatchRange() will only consider the first copy of an // infinitely repeated element (i.e., any regexp element followed by a '*' or // '+' operator). Regexps with "{N}" constructions are not affected, as those // do not compile down to infinite repetitions. // // Returns true on success, false on error. bool PossibleMatchRange(std::string* min, std::string* max, int maxlen) const; // Generic matching interface // Type of match. enum Anchor { UNANCHORED, // No anchoring ANCHOR_START, // Anchor at start only ANCHOR_BOTH // Anchor at start and end }; Anchor Anchored() const; // Return the number of capturing subpatterns, or -1 if the // regexp wasn't valid on construction. The overall match ($0) // does not count: if the regexp is "(a)(b)", returns 2. int NumberOfCapturingGroups() const { return num_captures_; } // Return a map from names to capturing indices. // The map records the index of the leftmost group // with the given name. // Only valid until the re is deleted. const std::map& NamedCapturingGroups() const; // Return a map from capturing indices to names. // The map has no entries for unnamed groups. // Only valid until the re is deleted. const std::map& CapturingGroupNames() const; // General matching routine. // Match against text starting at offset startpos // and stopping the search at offset endpos. // Returns true if match found, false if not. // On a successful match, fills in submatch[] (up to nsubmatch entries) // with information about submatches. // I.e. matching RE2("(foo)|(bar)baz") on "barbazbla" will return true, with // submatch[0] = "barbaz", submatch[1].data() = NULL, submatch[2] = "bar", // submatch[3].data() = NULL, ..., up to submatch[nsubmatch-1].data() = NULL. // Caveat: submatch[] may be clobbered even on match failure. // // Don't ask for more match information than you will use: // runs much faster with nsubmatch == 1 than nsubmatch > 1, and // runs even faster if nsubmatch == 0. // Doesn't make sense to use nsubmatch > 1 + NumberOfCapturingGroups(), // but will be handled correctly. // // Passing text == StringPiece(NULL, 0) will be handled like any other // empty string, but note that on return, it will not be possible to tell // whether submatch i matched the empty string or did not match: // either way, submatch[i].data() == NULL. bool Match(const StringPiece& text, size_t startpos, size_t endpos, Anchor re_anchor, StringPiece* submatch, int nsubmatch) const; // Check that the given rewrite string is suitable for use with this // regular expression. It checks that: // * The regular expression has enough parenthesized subexpressions // to satisfy all of the \N tokens in rewrite // * The rewrite string doesn't have any syntax errors. E.g., // '\' followed by anything other than a digit or '\'. // A true return value guarantees that Replace() and Extract() won't // fail because of a bad rewrite string. bool CheckRewriteString(const StringPiece& rewrite, std::string* error) const; // Returns the maximum submatch needed for the rewrite to be done by // Replace(). E.g. if rewrite == "foo \\2,\\1", returns 2. static int MaxSubmatch(const StringPiece& rewrite); // Append the "rewrite" string, with backslash subsitutions from "vec", // to string "out". // Returns true on success. This method can fail because of a malformed // rewrite string. CheckRewriteString guarantees that the rewrite will // be sucessful. bool Rewrite(std::string* out, const StringPiece& rewrite, const StringPiece* vec, int veclen) const; // Constructor options class Options { public: // The options are (defaults in parentheses): // // utf8 (true) text and pattern are UTF-8; otherwise Latin-1 // posix_syntax (false) restrict regexps to POSIX egrep syntax // longest_match (false) search for longest match, not first match // log_errors (true) log syntax and execution errors to ERROR // max_mem (see below) approx. max memory footprint of RE2 // literal (false) interpret string as literal, not regexp // never_nl (false) never match \n, even if it is in regexp // dot_nl (false) dot matches everything including new line // never_capture (false) parse all parens as non-capturing // case_sensitive (true) match is case-sensitive (regexp can override // with (?i) unless in posix_syntax mode) // // The following options are only consulted when posix_syntax == true. // When posix_syntax == false, these features are always enabled and // cannot be turned off; to perform multi-line matching in that case, // begin the regexp with (?m). // perl_classes (false) allow Perl's \d \s \w \D \S \W // word_boundary (false) allow Perl's \b \B (word boundary and not) // one_line (false) ^ and $ only match beginning and end of text // // The max_mem option controls how much memory can be used // to hold the compiled form of the regexp (the Prog) and // its cached DFA graphs. Code Search placed limits on the number // of Prog instructions and DFA states: 10,000 for both. // In RE2, those limits would translate to about 240 KB per Prog // and perhaps 2.5 MB per DFA (DFA state sizes vary by regexp; RE2 does a // better job of keeping them small than Code Search did). // Each RE2 has two Progs (one forward, one reverse), and each Prog // can have two DFAs (one first match, one longest match). // That makes 4 DFAs: // // forward, first-match - used for UNANCHORED or ANCHOR_START searches // if opt.longest_match() == false // forward, longest-match - used for all ANCHOR_BOTH searches, // and the other two kinds if // opt.longest_match() == true // reverse, first-match - never used // reverse, longest-match - used as second phase for unanchored searches // // The RE2 memory budget is statically divided between the two // Progs and then the DFAs: two thirds to the forward Prog // and one third to the reverse Prog. The forward Prog gives half // of what it has left over to each of its DFAs. The reverse Prog // gives it all to its longest-match DFA. // // Once a DFA fills its budget, it flushes its cache and starts over. // If this happens too often, RE2 falls back on the NFA implementation. // For now, make the default budget something close to Code Search. static const int kDefaultMaxMem = 8<<20; enum Encoding { EncodingUTF8 = 1, EncodingLatin1 }; Options() : encoding_(EncodingUTF8), posix_syntax_(false), longest_match_(false), log_errors_(true), max_mem_(kDefaultMaxMem), literal_(false), never_nl_(false), dot_nl_(false), never_capture_(false), case_sensitive_(true), perl_classes_(false), word_boundary_(false), one_line_(false) { } /*implicit*/ Options(CannedOptions); Encoding encoding() const { return encoding_; } void set_encoding(Encoding encoding) { encoding_ = encoding; } // Legacy interface to encoding. // TODO(rsc): Remove once clients have been converted. bool utf8() const { return encoding_ == EncodingUTF8; } void set_utf8(bool b) { if (b) { encoding_ = EncodingUTF8; } else { encoding_ = EncodingLatin1; } } bool posix_syntax() const { return posix_syntax_; } void set_posix_syntax(bool b) { posix_syntax_ = b; } bool longest_match() const { return longest_match_; } void set_longest_match(bool b) { longest_match_ = b; } bool log_errors() const { return log_errors_; } void set_log_errors(bool b) { log_errors_ = b; } int64_t max_mem() const { return max_mem_; } void set_max_mem(int64_t m) { max_mem_ = m; } bool literal() const { return literal_; } void set_literal(bool b) { literal_ = b; } bool never_nl() const { return never_nl_; } void set_never_nl(bool b) { never_nl_ = b; } bool dot_nl() const { return dot_nl_; } void set_dot_nl(bool b) { dot_nl_ = b; } bool never_capture() const { return never_capture_; } void set_never_capture(bool b) { never_capture_ = b; } bool case_sensitive() const { return case_sensitive_; } void set_case_sensitive(bool b) { case_sensitive_ = b; } bool perl_classes() const { return perl_classes_; } void set_perl_classes(bool b) { perl_classes_ = b; } bool word_boundary() const { return word_boundary_; } void set_word_boundary(bool b) { word_boundary_ = b; } bool one_line() const { return one_line_; } void set_one_line(bool b) { one_line_ = b; } void Copy(const Options& src) { *this = src; } int ParseFlags() const; private: Encoding encoding_; bool posix_syntax_; bool longest_match_; bool log_errors_; int64_t max_mem_; bool literal_; bool never_nl_; bool dot_nl_; bool never_capture_; bool case_sensitive_; bool perl_classes_; bool word_boundary_; bool one_line_; }; // Returns the options set in the constructor. const Options& options() const { return options_; } // Argument converters; see below. static inline Arg CRadix(short* x); static inline Arg CRadix(unsigned short* x); static inline Arg CRadix(int* x); static inline Arg CRadix(unsigned int* x); static inline Arg CRadix(long* x); static inline Arg CRadix(unsigned long* x); static inline Arg CRadix(long long* x); static inline Arg CRadix(unsigned long long* x); static inline Arg Hex(short* x); static inline Arg Hex(unsigned short* x); static inline Arg Hex(int* x); static inline Arg Hex(unsigned int* x); static inline Arg Hex(long* x); static inline Arg Hex(unsigned long* x); static inline Arg Hex(long long* x); static inline Arg Hex(unsigned long long* x); static inline Arg Octal(short* x); static inline Arg Octal(unsigned short* x); static inline Arg Octal(int* x); static inline Arg Octal(unsigned int* x); static inline Arg Octal(long* x); static inline Arg Octal(unsigned long* x); static inline Arg Octal(long long* x); static inline Arg Octal(unsigned long long* x); private: void Init(const StringPiece& pattern, const Options& options); bool DoMatch(const StringPiece& text, Anchor re_anchor, size_t* consumed, const Arg* const args[], int n) const; duckdb_re2::Prog* ReverseProg() const; std::string pattern_; // string regular expression Options options_; // option flags std::string prefix_; // required prefix (before regexp_) bool prefix_foldcase_; // prefix is ASCII case-insensitive duckdb_re2::Regexp* entire_regexp_; // parsed regular expression duckdb_re2::Regexp* suffix_regexp_; // parsed regular expression, prefix removed duckdb_re2::Prog* prog_; // compiled program for regexp int num_captures_; // Number of capturing groups bool is_one_pass_; // can use prog_->SearchOnePass? mutable duckdb_re2::Prog* rprog_; // reverse program for regexp mutable const std::string* error_; // Error indicator // (or points to empty string) mutable ErrorCode error_code_; // Error code mutable std::string error_arg_; // Fragment of regexp showing error // Map from capture names to indices mutable const std::map* named_groups_; // Map from capture indices to names mutable const std::map* group_names_; // Onces for lazy computations. mutable std::once_flag rprog_once_; mutable std::once_flag named_groups_once_; mutable std::once_flag group_names_once_; RE2(const RE2&) = delete; RE2& operator=(const RE2&) = delete; }; /***** Implementation details *****/ // Hex/Octal/Binary? // Special class for parsing into objects that define a ParseFrom() method template class _RE2_MatchObject { public: static inline bool Parse(const char* str, size_t n, void* dest) { if (dest == NULL) return true; T* object = reinterpret_cast(dest); return object->ParseFrom(str, n); } }; class RE2::Arg { public: // Empty constructor so we can declare arrays of RE2::Arg Arg(); // Constructor specially designed for NULL arguments Arg(void*); Arg(std::nullptr_t); typedef bool (*Parser)(const char* str, size_t n, void* dest); // Type-specific parsers #define MAKE_PARSER(type, name) \ Arg(type* p) : arg_(p), parser_(name) {} \ Arg(type* p, Parser parser) : arg_(p), parser_(parser) {} MAKE_PARSER(char, parse_char) MAKE_PARSER(signed char, parse_schar) MAKE_PARSER(unsigned char, parse_uchar) MAKE_PARSER(float, parse_float) MAKE_PARSER(double, parse_double) MAKE_PARSER(std::string, parse_string) MAKE_PARSER(StringPiece, parse_stringpiece) MAKE_PARSER(short, parse_short) MAKE_PARSER(unsigned short, parse_ushort) MAKE_PARSER(int, parse_int) MAKE_PARSER(unsigned int, parse_uint) MAKE_PARSER(long, parse_long) MAKE_PARSER(unsigned long, parse_ulong) MAKE_PARSER(long long, parse_longlong) MAKE_PARSER(unsigned long long, parse_ulonglong) #undef MAKE_PARSER // Generic constructor templates template Arg(T* p) : arg_(p), parser_(_RE2_MatchObject::Parse) { } template Arg(T* p, Parser parser) : arg_(p), parser_(parser) { } // Parse the data bool Parse(const char* str, size_t n) const; private: void* arg_; Parser parser_; static bool parse_null (const char* str, size_t n, void* dest); static bool parse_char (const char* str, size_t n, void* dest); static bool parse_schar (const char* str, size_t n, void* dest); static bool parse_uchar (const char* str, size_t n, void* dest); static bool parse_float (const char* str, size_t n, void* dest); static bool parse_double (const char* str, size_t n, void* dest); static bool parse_string (const char* str, size_t n, void* dest); static bool parse_stringpiece (const char* str, size_t n, void* dest); #define DECLARE_INTEGER_PARSER(name) \ private: \ static bool parse_##name(const char* str, size_t n, void* dest); \ static bool parse_##name##_radix(const char* str, size_t n, void* dest, \ int radix); \ \ public: \ static bool parse_##name##_hex(const char* str, size_t n, void* dest); \ static bool parse_##name##_octal(const char* str, size_t n, void* dest); \ static bool parse_##name##_cradix(const char* str, size_t n, void* dest); DECLARE_INTEGER_PARSER(short) DECLARE_INTEGER_PARSER(ushort) DECLARE_INTEGER_PARSER(int) DECLARE_INTEGER_PARSER(uint) DECLARE_INTEGER_PARSER(long) DECLARE_INTEGER_PARSER(ulong) DECLARE_INTEGER_PARSER(longlong) DECLARE_INTEGER_PARSER(ulonglong) #undef DECLARE_INTEGER_PARSER }; inline RE2::Arg::Arg() : arg_(NULL), parser_(parse_null) { } inline RE2::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { } inline RE2::Arg::Arg(std::nullptr_t p) : arg_(p), parser_(parse_null) { } inline bool RE2::Arg::Parse(const char* str, size_t n) const { return (*parser_)(str, n, arg_); } // This part of the parser, appropriate only for ints, deals with bases #define MAKE_INTEGER_PARSER(type, name) \ inline RE2::Arg RE2::Hex(type* ptr) { \ return RE2::Arg(ptr, RE2::Arg::parse_##name##_hex); \ } \ inline RE2::Arg RE2::Octal(type* ptr) { \ return RE2::Arg(ptr, RE2::Arg::parse_##name##_octal); \ } \ inline RE2::Arg RE2::CRadix(type* ptr) { \ return RE2::Arg(ptr, RE2::Arg::parse_##name##_cradix); \ } MAKE_INTEGER_PARSER(short, short) MAKE_INTEGER_PARSER(unsigned short, ushort) MAKE_INTEGER_PARSER(int, int) MAKE_INTEGER_PARSER(unsigned int, uint) MAKE_INTEGER_PARSER(long, long) MAKE_INTEGER_PARSER(unsigned long, ulong) MAKE_INTEGER_PARSER(long long, longlong) MAKE_INTEGER_PARSER(unsigned long long, ulonglong) #undef MAKE_INTEGER_PARSER #ifndef SWIG // Helper for writing global or static RE2s safely. // Write // static LazyRE2 re = {".*"}; // and then use *re instead of writing // static RE2 re(".*"); // The former is more careful about multithreaded // situations than the latter. // // N.B. This class never deletes the RE2 object that // it constructs: that's a feature, so that it can be used // for global and function static variables. class LazyRE2 { private: struct NoArg {}; public: typedef RE2 element_type; // support std::pointer_traits // Constructor omitted to preserve braced initialization in C++98. // Pretend to be a pointer to Type (never NULL due to on-demand creation): RE2& operator*() const { return *get(); } RE2* operator->() const { return get(); } // Named accessor/initializer: RE2* get() const { std::call_once(once_, &LazyRE2::Init, this); return ptr_; } // All data fields must be public to support {"foo"} initialization. const char* pattern_; RE2::CannedOptions options_; NoArg barrier_against_excess_initializers_; mutable RE2* ptr_; mutable std::once_flag once_; private: static void Init(const LazyRE2* lazy_re2) { lazy_re2->ptr_ = new RE2(lazy_re2->pattern_, lazy_re2->options_); } void operator=(const LazyRE2&); // disallowed }; #endif // SWIG } // namespace duckdb_re2 using duckdb_re2::RE2; using duckdb_re2::LazyRE2; #endif // RE2_RE2_H_ // LICENSE_CHANGE_END #include #include namespace duckdb { class HivePartitioning { public: //! Parse a filename that follows the hive partitioning scheme DUCKDB_API static std::map Parse(const string &filename); DUCKDB_API static std::map Parse(const string &filename, duckdb_re2::RE2 ®ex); //! Prunes a list of filenames based on a set of filters, can be used by TableFunctions in the //! pushdown_complex_filter function to skip files with filename-based filters. Also removes the filters that always //! evaluate to true. DUCKDB_API static void ApplyFiltersToFileList(ClientContext &context, vector &files, vector> &filters, unordered_map &column_map, idx_t table_index, bool hive_enabled, bool filename_enabled); //! Returns the compiled regex pattern to match hive partitions DUCKDB_API static const string REGEX_STRING; }; struct HivePartitionKey { //! Columns by which we want to partition vector values; //! Precomputed hash of values hash_t hash; struct Hash { std::size_t operator()(const HivePartitionKey &k) const { return k.hash; } }; struct Equality { bool operator()(const HivePartitionKey &a, const HivePartitionKey &b) const { if (a.values.size() != b.values.size()) { return false; } for (idx_t i = 0; i < a.values.size(); i++) { if (!Value::NotDistinctFrom(a.values[i], b.values[i])) { return false; } } return true; } }; }; //! Maps hive partitions to partition_ids typedef unordered_map hive_partition_map_t; //! class shared between HivePartitionColumnData classes that synchronizes partition discovery between threads. //! each HivePartitionedColumnData will hold a local copy of the key->partition map class GlobalHivePartitionState { public: mutex lock; hive_partition_map_t partition_map; //! Used for incremental updating local copies of the partition map; vector partitions; }; class HivePartitionedColumnData : public PartitionedColumnData { public: HivePartitionedColumnData(ClientContext &context, vector types, vector partition_by_cols, shared_ptr global_state = nullptr) : PartitionedColumnData(PartitionedColumnDataType::HIVE, context, std::move(types)), global_state(std::move(global_state)), group_by_columns(std::move(partition_by_cols)), hashes_v(LogicalType::HASH) { InitializeKeys(); } HivePartitionedColumnData(const HivePartitionedColumnData &other); void ComputePartitionIndices(PartitionedColumnDataAppendState &state, DataChunk &input) override; //! Reverse lookup map to reconstruct keys from a partition id std::map GetReverseMap(); protected: //! Create allocators for all currently registered partitions void GrowAllocators(); //! Create append states for all currently registered partitions void GrowAppendState(PartitionedColumnDataAppendState &state); //! Create and initialize partitions for all currently registered partitions void GrowPartitions(PartitionedColumnDataAppendState &state); //! Register a newly discovered partition idx_t RegisterNewPartition(HivePartitionKey key, PartitionedColumnDataAppendState &state); //! Copy the newly added entries in the global_state.map to the local_partition_map (requires lock!) void SynchronizeLocalMap(); private: void InitializeKeys(); protected: //! Shared HivePartitionedColumnData should always have a global state to allow parallel key discovery shared_ptr global_state; //! Thread-local copy of the partition map hive_partition_map_t local_partition_map; //! The columns that make up the key vector group_by_columns; //! Thread-local pre-allocated vector for hashes Vector hashes_v; //! Thread-local pre-allocated HivePartitionKeys vector keys; }; } // namespace duckdb namespace duckdb { class Serializer; class Deserializer; struct BindInfo; struct MultiFileReaderOptions { bool filename = false; bool hive_partitioning = false; bool auto_detect_hive_partitioning = true; bool union_by_name = false; DUCKDB_API void Serialize(Serializer &serializer) const; DUCKDB_API static MultiFileReaderOptions Deserialize(Deserializer &source); DUCKDB_API void AddBatchInfo(BindInfo &bind_info) const; static bool AutoDetectHivePartitioning(const vector &files) { if (files.empty()) { return false; } std::unordered_set uset; idx_t splits_size; { // front file auto splits = StringUtil::Split(files.front(), FileSystem::PathSeparator()); splits_size = splits.size(); if (splits.size() < 2) { return false; } for (auto it = splits.begin(); it != std::prev(splits.end()); it++) { auto part = StringUtil::Split(*it, "="); if (part.size() == 2) { uset.insert(part.front()); } } } if (uset.empty()) { return false; } for (auto &file : files) { auto splits = StringUtil::Split(file, FileSystem::PathSeparator()); if (splits.size() != splits_size) { return false; } for (auto it = splits.begin(); it != std::prev(splits.end()); it++) { auto part = StringUtil::Split(*it, "="); if (part.size() == 2) { if (uset.find(part.front()) == uset.end()) { return false; } } } } return true; } }; } // namespace duckdb namespace duckdb { enum class NewLineIdentifier : uint8_t { SINGLE = 1, // Either \r or \n CARRY_ON = 2, // \r\n MIX = 3, // Hippie-Land, can't run it multithreaded NOT_SET = 4 }; enum class ParallelMode { AUTOMATIC = 0, PARALLEL = 1, SINGLE_THREADED = 2 }; struct BufferedCSVReaderOptions { //===--------------------------------------------------------------------===// // CommonCSVOptions //===--------------------------------------------------------------------===// //! Whether or not a delimiter was defined by the user bool has_delimiter = false; //! Delimiter to separate columns within each line string delimiter = ","; //! Whether or not a new_line was defined by the user bool has_newline = false; //! New Line separator NewLineIdentifier new_line = NewLineIdentifier::NOT_SET; //! Whether or not a quote was defined by the user bool has_quote = false; //! Quote used for columns that contain reserved characters, e.g., delimiter string quote = "\""; //! Whether or not an escape character was defined by the user bool has_escape = false; //! Escape character to escape quote character string escape; //! Whether or not a header information was given by the user bool has_header = false; //! Whether or not the file has a header line bool header = false; //! Whether or not we should ignore InvalidInput errors bool ignore_errors = false; //! Expected number of columns idx_t num_cols = 0; //! Number of samples to buffer idx_t buffer_sample_size = STANDARD_VECTOR_SIZE * 50; //! Specifies the string that represents a null value string null_str; //! Whether file is compressed or not, and if so which compression type //! AUTO_DETECT (default; infer from file extension) FileCompressionType compression = FileCompressionType::AUTO_DETECT; //! Option to convert quoted values to NULL values bool allow_quoted_nulls = true; //===--------------------------------------------------------------------===// // CSVAutoOptions //===--------------------------------------------------------------------===// //! SQL Type list mapping of name to SQL type index in sql_type_list case_insensitive_map_t sql_types_per_column; //! User-defined SQL type list vector sql_type_list; //! User-defined name list vector name_list; //! Types considered as candidates for auto detection ordered by descending specificity (~ from high to low) vector auto_type_candidates = {LogicalType::VARCHAR, LogicalType::TIMESTAMP, LogicalType::DATE, LogicalType::TIME, LogicalType::DOUBLE, LogicalType::BIGINT, LogicalType::BOOLEAN, LogicalType::SQLNULL}; //===--------------------------------------------------------------------===// // ReadCSVOptions //===--------------------------------------------------------------------===// //! How many leading rows to skip idx_t skip_rows = 0; //! Whether or not the skip_rows is set by the user bool skip_rows_set = false; //! Maximum CSV line size: specified because if we reach this amount, we likely have wrong delimiters (default: 2MB) //! note that this is the guaranteed line length that will succeed, longer lines may be accepted if slightly above idx_t maximum_line_size = 2097152; //! Whether or not header names shall be normalized bool normalize_names = false; //! True, if column with that index must skip null check vector force_not_null; //! Consider all columns to be of type varchar bool all_varchar = false; //! Size of sample chunk used for dialect and type detection idx_t sample_chunk_size = STANDARD_VECTOR_SIZE; //! Number of sample chunks used for type detection idx_t sample_chunks = 10; //! Whether or not to automatically detect dialect and datatypes bool auto_detect = false; //! The file path of the CSV file to read string file_path; //! Multi-file reader options MultiFileReaderOptions file_options; //! Buffer Size (Parallel Scan) idx_t buffer_size = CSVBuffer::INITIAL_BUFFER_SIZE_COLOSSAL; //! Decimal separator when reading as numeric string decimal_separator = "."; //! Whether or not to pad rows that do not have enough columns with NULL values bool null_padding = false; //! If we are running the parallel version of the CSV Reader. In general, the system should always auto-detect //! When it can't execute a parallel run before execution. However, there are (rather specific) situations where //! setting up this manually might be important ParallelMode parallel_mode; //===--------------------------------------------------------------------===// // WriteCSVOptions //===--------------------------------------------------------------------===// //! True, if column with that index must be quoted vector force_quote; //! Prefix/suffix/custom newline the entire file once (enables writing of files as JSON arrays) string prefix; string suffix; string write_newline; //! The date format to use (if any is specified) std::map date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}}; //! The date format to use for writing (if any is specified) std::map write_date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}}; //! Whether or not a type format is specified std::map has_format = {{LogicalTypeId::DATE, false}, {LogicalTypeId::TIMESTAMP, false}}; void Serialize(FieldWriter &writer) const; void Deserialize(FieldReader &reader); void SetCompression(const string &compression); void SetHeader(bool has_header); void SetEscape(const string &escape); void SetQuote(const string "e); void SetDelimiter(const string &delimiter); void SetNewline(const string &input); //! Set an option that is supported by both reading and writing functions, called by //! the SetReadOption and SetWriteOption methods bool SetBaseOption(const string &loption, const Value &value); //! loption - lowercase string //! set - argument(s) to the option //! expected_names - names expected if the option is "columns" void SetReadOption(const string &loption, const Value &value, vector &expected_names); void SetWriteOption(const string &loption, const Value &value); void SetDateFormat(LogicalTypeId type, const string &format, bool read_format); std::string ToString() const; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/multi_file_reader.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/union_by_name.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class UnionByName { public: static void CombineUnionTypes(const vector &new_names, const vector &new_types, vector &union_col_types, vector &union_col_names, case_insensitive_map_t &union_names_map); //! Union all files(readers) by their col names template static vector> UnionCols(ClientContext &context, const vector &files, vector &union_col_types, vector &union_col_names, OPTION_TYPE &options) { vector> union_readers; case_insensitive_map_t union_names_map; for (idx_t file_idx = 0; file_idx < files.size(); ++file_idx) { const auto file_name = files[file_idx]; auto reader = make_uniq(context, file_name, options); auto &col_names = reader->GetNames(); auto &sql_types = reader->GetTypes(); CombineUnionTypes(col_names, sql_types, union_col_types, union_col_names, union_names_map); union_readers.push_back(std::move(reader)); } return union_readers; } }; } // namespace duckdb namespace duckdb { class TableFunction; class TableFunctionSet; class TableFilterSet; class LogicalGet; class Expression; class ClientContext; class DataChunk; struct HivePartitioningIndex { HivePartitioningIndex(string value, idx_t index); string value; idx_t index; DUCKDB_API void Serialize(Serializer &serializer) const; DUCKDB_API static HivePartitioningIndex Deserialize(Deserializer &source); }; //! The bind data for the multi-file reader, obtained through MultiFileReader::BindReader struct MultiFileReaderBindData { //! The index of the filename column (if any) idx_t filename_idx = DConstants::INVALID_INDEX; //! The set of hive partitioning indexes (if any) vector hive_partitioning_indexes; DUCKDB_API void Serialize(Serializer &serializer) const; DUCKDB_API static MultiFileReaderBindData Deserialize(Deserializer &source); }; struct MultiFileFilterEntry { idx_t index = DConstants::INVALID_INDEX; bool is_constant = false; }; struct MultiFileConstantEntry { MultiFileConstantEntry(idx_t column_id, Value value_p) : column_id(column_id), value(std::move(value_p)) { } //! The column id to apply the constant value to idx_t column_id; //! The constant value Value value; }; struct MultiFileReaderData { //! The column ids to read from the file vector column_ids; //! The mapping of column id -> result column id //! The result chunk will be filled as follows: chunk.data[column_mapping[i]] = ReadColumn(column_ids[i]); vector column_mapping; //! Whether or not there are no columns to read. This can happen when a file only consists of constants bool empty_columns = false; //! Filters can point to either (1) local columns in the file, or (2) constant values in the `constant_map` //! This map specifies where the to-be-filtered value can be found vector filter_map; //! The set of table filters optional_ptr filters; //! The constants that should be applied at the various positions vector constant_map; //! Map of column_id -> cast, used when reading multiple files when files have diverging types //! for the same column unordered_map cast_map; }; struct MultiFileReader { //! Add the parameters for multi-file readers (e.g. union_by_name, filename) to a table function DUCKDB_API static void AddParameters(TableFunction &table_function); //! Performs any globbing for the multi-file reader and returns a list of files to be read DUCKDB_API static vector GetFileList(ClientContext &context, const Value &input, const string &name, FileGlobOptions options = FileGlobOptions::DISALLOW_EMPTY); //! Parse the named parameters of a multi-file reader DUCKDB_API static bool ParseOption(const string &key, const Value &val, MultiFileReaderOptions &options); //! Perform complex filter pushdown into the multi-file reader, potentially filtering out files that should be read //! If "true" the first file has been eliminated DUCKDB_API static bool ComplexFilterPushdown(ClientContext &context, vector &files, const MultiFileReaderOptions &options, LogicalGet &get, vector> &filters); //! Bind the options of the multi-file reader, potentially emitting any extra columns that are required DUCKDB_API static MultiFileReaderBindData BindOptions(MultiFileReaderOptions &options, const vector &files, vector &return_types, vector &names); //! Finalize the bind phase of the multi-file reader after we know (1) the required (output) columns, and (2) the //! pushed down table filters DUCKDB_API static void FinalizeBind(const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &options, const string &filename, const vector &local_names, const vector &global_types, const vector &global_names, const vector &global_column_ids, MultiFileReaderData &reader_data); //! Create all required mappings from the global types/names to the file-local types/names DUCKDB_API static void CreateMapping(const string &file_name, const vector &local_types, const vector &local_names, const vector &global_types, const vector &global_names, const vector &global_column_ids, optional_ptr filters, MultiFileReaderData &reader_data, const string &initial_file); //! Finalize the reading of a chunk - applying any constants that are required DUCKDB_API static void FinalizeChunk(const MultiFileReaderBindData &bind_data, const MultiFileReaderData &reader_data, DataChunk &chunk); //! Creates a table function set from a single reader function (including e.g. list parameters, etc) DUCKDB_API static TableFunctionSet CreateFunctionSet(TableFunction table_function); template static MultiFileReaderBindData BindUnionReader(ClientContext &context, vector &return_types, vector &names, RESULT_CLASS &result, OPTIONS_CLASS &options) { D_ASSERT(options.file_options.union_by_name); vector union_col_names; vector union_col_types; // obtain the set of union column names + types by unifying the types of all of the files // note that this requires opening readers for each file and reading the metadata of each file auto union_readers = UnionByName::UnionCols(context, result.files, union_col_types, union_col_names, options); std::move(union_readers.begin(), union_readers.end(), std::back_inserter(result.union_readers)); // perform the binding on the obtained set of names + types auto bind_data = MultiFileReader::BindOptions(options.file_options, result.files, union_col_types, union_col_names); names = union_col_names; return_types = union_col_types; result.Initialize(result.union_readers[0]); D_ASSERT(names.size() == return_types.size()); return bind_data; } template static MultiFileReaderBindData BindReader(ClientContext &context, vector &return_types, vector &names, RESULT_CLASS &result, OPTIONS_CLASS &options) { if (options.file_options.union_by_name) { return BindUnionReader(context, return_types, names, result, options); } else { shared_ptr reader; reader = make_shared(context, result.files[0], options); return_types = reader->return_types; names = reader->names; result.Initialize(std::move(reader)); return MultiFileReader::BindOptions(options.file_options, result.files, return_types, names); } } template static void InitializeReader(READER_CLASS &reader, const MultiFileReaderOptions &options, const MultiFileReaderBindData &bind_data, const vector &global_types, const vector &global_names, const vector &global_column_ids, optional_ptr table_filters, const string &initial_file) { FinalizeBind(options, bind_data, reader.GetFileName(), reader.GetNames(), global_types, global_names, global_column_ids, reader.reader_data); CreateMapping(reader.GetFileName(), reader.GetTypes(), reader.GetNames(), global_types, global_names, global_column_ids, table_filters, reader.reader_data, initial_file); reader.reader_data.filters = table_filters; } template static void PruneReaders(BIND_DATA &data) { unordered_set file_set; for (auto &file : data.files) { file_set.insert(file); } if (data.initial_reader) { // check if the initial reader should still be read auto entry = file_set.find(data.initial_reader->GetFileName()); if (entry == file_set.end()) { data.initial_reader.reset(); } } for (idx_t r = 0; r < data.union_readers.size(); r++) { // check if the union reader should still be read or not auto entry = file_set.find(data.union_readers[r]->GetFileName()); if (entry == file_set.end()) { data.union_readers.erase(data.union_readers.begin() + r); r--; continue; } } } private: static void CreateNameMapping(const string &file_name, const vector &local_types, const vector &local_names, const vector &global_types, const vector &global_names, const vector &global_column_ids, MultiFileReaderData &reader_data, const string &initial_file); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/operator/persistent/csv_line_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct LineInfo { public: explicit LineInfo(mutex &main_mutex_p, vector> &batch_to_tuple_end_p, vector> &tuple_start_p, vector> &tuple_end_p) : main_mutex(main_mutex_p), batch_to_tuple_end(batch_to_tuple_end_p), tuple_start(tuple_start_p), tuple_end(tuple_end_p) {}; bool CanItGetLine(idx_t file_idx, idx_t batch_idx); //! Return the 1-indexed line number idx_t GetLine(idx_t batch_idx, idx_t line_error = 0, idx_t file_idx = 0, idx_t cur_start = 0, bool verify = true); //! Verify if the CSV File was read correctly from [0,batch_idx] batches. void Verify(idx_t file_idx, idx_t batch_idx, idx_t cur_first_pos); //! Lines read per batch, unordered_map lines_read; //! Set of batches that have been initialized but are not yet finished. vector> current_batches; //! Pointer to CSV Reader Mutex mutex &main_mutex; //! Pointer Batch to Tuple End vector> &batch_to_tuple_end; //! Pointer Batch to Tuple Start vector> &tuple_start; //! Pointer Batch to Tuple End vector> &tuple_end; //! If we already threw an exception on a previous thread. bool done = false; idx_t first_line = 0; }; } // namespace duckdb #include namespace duckdb { struct CopyInfo; struct CSVFileHandle; struct FileHandle; struct StrpTimeFormat; class FileOpener; class FileSystem; enum class ParserMode : uint8_t { PARSING = 0, SNIFFING_DIALECT = 1, SNIFFING_DATATYPES = 2, PARSING_HEADER = 3 }; //! Buffered CSV reader is a class that reads values from a stream and parses them as a CSV file class BaseCSVReader { public: BaseCSVReader(ClientContext &context, BufferedCSVReaderOptions options, const vector &requested_types = vector()); ~BaseCSVReader(); ClientContext &context; FileSystem &fs; Allocator &allocator; BufferedCSVReaderOptions options; vector return_types; vector names; MultiFileReaderData reader_data; idx_t linenr = 0; bool linenr_estimated = false; bool row_empty = false; idx_t sample_chunk_idx = 0; bool jumping_samples = false; bool end_of_file_reached = false; bool bom_checked = false; idx_t bytes_in_chunk = 0; double bytes_per_line_avg = 0; DataChunk parse_chunk; ParserMode mode; public: const string &GetFileName() { return options.file_path; } const vector &GetNames() { return names; } const vector &GetTypes() { return return_types; } //! Get the 1-indexed global line number for the given local error line virtual idx_t GetLineError(idx_t line_error, idx_t buffer_idx) { return line_error + 1; }; //! Initialize projection indices to select all columns void InitializeProjection(); protected: //! Initializes the parse_chunk with varchar columns and aligns info with new number of cols void InitParseChunk(idx_t num_cols); //! Change the date format for the type to the string void SetDateFormat(const string &format_specifier, const LogicalTypeId &sql_type); //! Try to cast a string value to the specified sql type bool TryCastValue(const Value &value, const LogicalType &sql_type); //! Try to cast a vector of values to the specified sql type bool TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type); //! Adds a value to the current row void AddValue(string_t str_val, idx_t &column, vector &escape_positions, bool has_quotes, idx_t buffer_idx = 0); //! Adds a row to the insert_chunk, returns true if the chunk is filled as a result of this row being added bool AddRow(DataChunk &insert_chunk, idx_t &column, string &error_message, idx_t buffer_idx = 0); //! Finalizes a chunk, parsing all values that have been added so far and adding them to the insert_chunk bool Flush(DataChunk &insert_chunk, idx_t buffer_idx = 0, bool try_add_line = false); unique_ptr OpenCSV(const BufferedCSVReaderOptions &options); void VerifyUTF8(idx_t col_idx); void VerifyUTF8(idx_t col_idx, idx_t row_idx, DataChunk &chunk, int64_t offset = 0); string GetLineNumberStr(idx_t linenr, bool linenr_estimated, idx_t buffer_idx = 0); //! Sets the newline delimiter void SetNewLineDelimiter(bool carry = false, bool carry_followed_by_nl = false); protected: //! Whether or not the current row's columns have overflown return_types.size() bool error_column_overflow = false; //! Number of sniffed columns - only used when auto-detecting vector sniffed_column_counts; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/main/error_manager.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class ClientContext; class DatabaseInstance; enum class ErrorType : uint16_t { // error message types UNSIGNED_EXTENSION = 0, INVALIDATED_TRANSACTION = 1, INVALIDATED_DATABASE = 2, // this should always be the last value ERROR_COUNT, INVALID = 65535, }; //! The error manager class is responsible for formatting error messages //! It allows for error messages to be overridden by extensions and clients class ErrorManager { public: template string FormatException(ErrorType error_type, Args... params) { vector values; return FormatExceptionRecursive(error_type, values, params...); } DUCKDB_API string FormatExceptionRecursive(ErrorType error_type, vector &values); template string FormatExceptionRecursive(ErrorType error_type, vector &values, T param, Args... params) { values.push_back(ExceptionFormatValue::CreateFormatValue(param)); return FormatExceptionRecursive(error_type, values, params...); } template static string FormatException(ClientContext &context, ErrorType error_type, Args... params) { return Get(context).FormatException(error_type, params...); } DUCKDB_API static string InvalidUnicodeError(const string &input, const string &context); //! Adds a custom error for a specific error type void AddCustomError(ErrorType type, string new_error); DUCKDB_API static ErrorManager &Get(ClientContext &context); DUCKDB_API static ErrorManager &Get(DatabaseInstance &context); private: map custom_errors; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/main/extension_helper.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { class DuckDB; enum class ExtensionLoadResult : uint8_t { LOADED_EXTENSION = 0, EXTENSION_UNKNOWN = 1, NOT_LOADED = 2 }; struct DefaultExtension { const char *name; const char *description; bool statically_loaded; }; struct ExtensionAlias { const char *alias; const char *extension; }; struct ExtensionInitResult { string filename; string basename; void *lib_hdl; }; class ExtensionHelper { public: static void LoadAllExtensions(DuckDB &db); static ExtensionLoadResult LoadExtension(DuckDB &db, const std::string &extension); static void InstallExtension(ClientContext &context, const string &extension, bool force_install); static void InstallExtension(DBConfig &config, FileSystem &fs, const string &extension, bool force_install); static void LoadExternalExtension(ClientContext &context, const string &extension); static void LoadExternalExtension(DatabaseInstance &db, FileSystem &fs, const string &extension); static string ExtensionDirectory(ClientContext &context); static string ExtensionDirectory(DBConfig &config, FileSystem &fs); static idx_t DefaultExtensionCount(); static DefaultExtension GetDefaultExtension(idx_t index); static idx_t ExtensionAliasCount(); static ExtensionAlias GetExtensionAlias(idx_t index); static const vector GetPublicKeys(); // Returns extension name, or empty string if not a replacement open path static string ExtractExtensionPrefixFromPath(const string &path); //! Apply any known extension aliases static string ApplyExtensionAlias(string extension_name); static string GetExtensionName(const string &extension); static bool IsFullPath(const string &extension); private: static void InstallExtensionInternal(DBConfig &config, ClientConfig *client_config, FileSystem &fs, const string &local_path, const string &extension, bool force_install); static const vector PathComponents(); static bool AllowAutoInstall(const string &extension); static ExtensionInitResult InitialLoad(DBConfig &config, FileSystem &fs, const string &extension); static bool TryInitialLoad(DBConfig &config, FileSystem &fs, const string &extension, ExtensionInitResult &result, string &error); //! For tagged releases we use the tag, else we use the git commit hash static const string GetVersionDirectoryName(); //! Version tags occur with and without 'v', tag in extension path is always with 'v' static const string NormalizeVersionTag(const string &version_tag); static bool IsRelease(const string &version_tag); static bool CreateSuggestions(const string &extension_name, string &message); private: static ExtensionLoadResult LoadExtensionInternal(DuckDB &db, const std::string &extension, bool initial_load); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/main/capi/capi_internal.hpp // // //===----------------------------------------------------------------------===// #include #include #ifdef _WIN32 #ifndef strdup #define strdup _strdup #endif #endif namespace duckdb { struct DatabaseData { unique_ptr database; }; struct PreparedStatementWrapper { unique_ptr statement; vector values; }; struct ExtractStatementsWrapper { vector> statements; string error; }; struct PendingStatementWrapper { unique_ptr statement; bool allow_streaming; }; struct ArrowResultWrapper { unique_ptr result; unique_ptr current_chunk; ArrowOptions options; }; struct AppenderWrapper { unique_ptr appender; string error; }; enum class CAPIResultSetType : uint8_t { CAPI_RESULT_TYPE_NONE = 0, CAPI_RESULT_TYPE_MATERIALIZED, CAPI_RESULT_TYPE_STREAMING, CAPI_RESULT_TYPE_DEPRECATED }; struct DuckDBResultData { //! The underlying query result unique_ptr result; // Results can only use either the new API or the old API, not a mix of the two // They start off as "none" and switch to one or the other when an API method is used CAPIResultSetType result_set_type; }; duckdb_type ConvertCPPTypeToC(const LogicalType &type); LogicalTypeId ConvertCTypeToCPP(duckdb_type c_type); idx_t GetCTypeSize(duckdb_type type); duckdb_state duckdb_translate_result(unique_ptr result, duckdb_result *out); bool deprecated_materialize_result(duckdb_result *result); } // namespace duckdb // LICENSE_CHANGE_BEGIN // The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 // See the end of this file for a list /* Formatting library for C++ Copyright (c) 2012 - present, Victor Zverovich Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. --- Optional exception to the license --- As an exception, if, as a result of your compiling your source code, portions of this Software are embedded into a machine-executable object form of such source code, you may redistribute such embedded portions in such object form without including the above copyright and permission notices. */ #ifndef FMT_FORMAT_H_ #define FMT_FORMAT_H_ // LICENSE_CHANGE_BEGIN // The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 // See the end of this file for a list // Formatting library for C++ - the core API // // Copyright (c) 2012 - present, Victor Zverovich // All rights reserved. // // For the license information refer to format.h. #ifndef FMT_CORE_H_ #define FMT_CORE_H_ #include // std::FILE #include #include #include #include // The fmt library version in the form major * 10000 + minor * 100 + patch. #define FMT_VERSION 60102 #ifdef __has_feature # define FMT_HAS_FEATURE(x) __has_feature(x) #else # define FMT_HAS_FEATURE(x) 0 #endif #if defined(__has_include) && !defined(__INTELLISENSE__) && \ !(defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1600) # define FMT_HAS_INCLUDE(x) __has_include(x) #else # define FMT_HAS_INCLUDE(x) 0 #endif #ifdef __has_cpp_attribute # define FMT_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) #else # define FMT_HAS_CPP_ATTRIBUTE(x) 0 #endif #if defined(__GNUC__) && !defined(__clang__) # define FMT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) #else # define FMT_GCC_VERSION 0 #endif #if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__) # define FMT_HAS_GXX_CXX11 FMT_GCC_VERSION #else # define FMT_HAS_GXX_CXX11 0 #endif #ifdef __NVCC__ # define FMT_NVCC __NVCC__ #else # define FMT_NVCC 0 #endif #ifdef _MSC_VER # define FMT_MSC_VER _MSC_VER #else # define FMT_MSC_VER 0 #endif // Check if relaxed C++14 constexpr is supported. // GCC doesn't allow throw in constexpr until version 6 (bug 67371). #if FMT_USE_CONSTEXPR # define FMT_CONSTEXPR inline # define FMT_CONSTEXPR_DECL #else # define FMT_CONSTEXPR inline # define FMT_CONSTEXPR_DECL #endif #ifndef FMT_OVERRIDE # if FMT_HAS_FEATURE(cxx_override) || \ (FMT_GCC_VERSION >= 408 && FMT_HAS_GXX_CXX11) || FMT_MSC_VER >= 1900 # define FMT_OVERRIDE override # else # define FMT_OVERRIDE # endif #endif // Check if exceptions are disabled. #ifndef FMT_EXCEPTIONS # if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || \ FMT_MSC_VER && !_HAS_EXCEPTIONS # define FMT_EXCEPTIONS 0 # else # define FMT_EXCEPTIONS 1 # endif #endif // Define FMT_USE_NOEXCEPT to make fmt use noexcept (C++11 feature). #ifndef FMT_USE_NOEXCEPT # define FMT_USE_NOEXCEPT 0 #endif #if FMT_USE_NOEXCEPT || FMT_HAS_FEATURE(cxx_noexcept) || \ (FMT_GCC_VERSION >= 408 && FMT_HAS_GXX_CXX11) || FMT_MSC_VER >= 1900 # define FMT_DETECTED_NOEXCEPT noexcept # define FMT_HAS_CXX11_NOEXCEPT 1 #else # define FMT_DETECTED_NOEXCEPT throw() # define FMT_HAS_CXX11_NOEXCEPT 0 #endif #ifndef FMT_NOEXCEPT # if FMT_EXCEPTIONS || FMT_HAS_CXX11_NOEXCEPT # define FMT_NOEXCEPT FMT_DETECTED_NOEXCEPT # else # define FMT_NOEXCEPT # endif #endif // [[noreturn]] is disabled on MSVC because of bogus unreachable code warnings. #if FMT_EXCEPTIONS && FMT_HAS_CPP_ATTRIBUTE(noreturn) && !FMT_MSC_VER # define FMT_NORETURN [[noreturn]] #else # define FMT_NORETURN #endif #ifndef FMT_DEPRECATED # if (FMT_HAS_CPP_ATTRIBUTE(deprecated) && __cplusplus >= 201402L) || \ FMT_MSC_VER >= 1900 # define FMT_DEPRECATED [[deprecated]] # else # if defined(__GNUC__) || defined(__clang__) # define FMT_DEPRECATED __attribute__((deprecated)) # elif FMT_MSC_VER # define FMT_DEPRECATED __declspec(deprecated) # else # define FMT_DEPRECATED /* deprecated */ # endif # endif #endif // Workaround broken [[deprecated]] in the Intel compiler and NVCC. #if defined(__INTEL_COMPILER) || FMT_NVCC # define FMT_DEPRECATED_ALIAS #else # define FMT_DEPRECATED_ALIAS FMT_DEPRECATED #endif #ifndef FMT_BEGIN_NAMESPACE # if FMT_HAS_FEATURE(cxx_inline_namespaces) || FMT_GCC_VERSION >= 404 || \ FMT_MSC_VER >= 1900 # define FMT_INLINE_NAMESPACE inline namespace # define FMT_END_NAMESPACE \ } \ } # else # define FMT_INLINE_NAMESPACE namespace # define FMT_END_NAMESPACE \ } \ using namespace v6; \ } # endif # define FMT_BEGIN_NAMESPACE \ namespace duckdb_fmt { \ FMT_INLINE_NAMESPACE v6 { #endif #if !defined(FMT_HEADER_ONLY) && defined(_WIN32) # ifdef FMT_EXPORT # define FMT_API __declspec(dllexport) # elif defined(FMT_SHARED) # define FMT_API __declspec(dllimport) # define FMT_EXTERN_TEMPLATE_API FMT_API # endif #endif #ifndef FMT_API # define FMT_API #endif #ifndef FMT_EXTERN_TEMPLATE_API # define FMT_EXTERN_TEMPLATE_API #endif #ifndef FMT_HEADER_ONLY # define FMT_EXTERN extern #else # define FMT_EXTERN #endif // libc++ supports string_view in pre-c++17. #if (FMT_HAS_INCLUDE() && \ (__cplusplus > 201402L || defined(_LIBCPP_VERSION))) || \ (defined(_MSVC_LANG) && _MSVC_LANG > 201402L && _MSC_VER >= 1910) # include # define FMT_USE_STRING_VIEW #elif FMT_HAS_INCLUDE("experimental/string_view") && __cplusplus >= 201402L # include # define FMT_USE_EXPERIMENTAL_STRING_VIEW #endif FMT_BEGIN_NAMESPACE // Implementations of enable_if_t and other types for pre-C++14 systems. template using enable_if_t = typename std::enable_if::type; template using conditional_t = typename std::conditional::type; template using bool_constant = std::integral_constant; template using remove_reference_t = typename std::remove_reference::type; template using remove_const_t = typename std::remove_const::type; template using remove_cvref_t = typename std::remove_cv>::type; struct monostate {}; // An enable_if helper to be used in template parameters which results in much // shorter symbols: https://godbolt.org/z/sWw4vP. Extra parentheses are needed // to workaround a bug in MSVC 2019 (see #1140 and #1186). #define FMT_ENABLE_IF(...) enable_if_t<(__VA_ARGS__), int> = 0 namespace internal { // A workaround for gcc 4.8 to make void_t work in a SFINAE context. template struct void_t_impl { using type = void; }; #ifndef FMT_ASSERT #define FMT_ASSERT(condition, message) #endif #if defined(FMT_USE_STRING_VIEW) template using std_string_view = std::basic_string_view; #elif defined(FMT_USE_EXPERIMENTAL_STRING_VIEW) template using std_string_view = std::experimental::basic_string_view; #else template struct std_string_view {}; #endif #ifdef FMT_USE_INT128 // Do nothing. #elif defined(__SIZEOF_INT128__) # define FMT_USE_INT128 1 using int128_t = __int128_t; using uint128_t = __uint128_t; #else # define FMT_USE_INT128 0 #endif #if !FMT_USE_INT128 struct int128_t {}; struct uint128_t {}; #endif // Casts a nonnegative integer to unsigned. template FMT_CONSTEXPR typename std::make_unsigned::type to_unsigned(Int value) { FMT_ASSERT(value >= 0, "negative value"); return static_cast::type>(value); } } // namespace internal template using void_t = typename internal::void_t_impl::type; /** An implementation of ``std::basic_string_view`` for pre-C++17. It provides a subset of the API. ``fmt::basic_string_view`` is used for format strings even if ``std::string_view`` is available to prevent issues when a library is compiled with a different ``-std`` option than the client code (which is not recommended). */ template class basic_string_view { private: const Char* data_; size_t size_; public: using char_type = Char; using iterator = const Char*; FMT_CONSTEXPR basic_string_view() FMT_NOEXCEPT : data_(nullptr), size_(0) {} /** Constructs a string reference object from a C string and a size. */ FMT_CONSTEXPR basic_string_view(const Char* s, size_t count) FMT_NOEXCEPT : data_(s), size_(count) {} /** \rst Constructs a string reference object from a C string computing the size with ``std::char_traits::length``. \endrst */ basic_string_view(const Char* s) : data_(s), size_(std::char_traits::length(s)) {} /** Constructs a string reference from a ``std::basic_string`` object. */ template FMT_CONSTEXPR basic_string_view( const std::basic_string& s) FMT_NOEXCEPT : data_(s.data()), size_(s.size()) {} template < typename S, FMT_ENABLE_IF(std::is_same>::value)> FMT_CONSTEXPR basic_string_view(S s) FMT_NOEXCEPT : data_(s.data()), size_(s.size()) {} /** Returns a pointer to the string data. */ FMT_CONSTEXPR const Char* data() const { return data_; } /** Returns the string size. */ FMT_CONSTEXPR size_t size() const { return size_; } FMT_CONSTEXPR iterator begin() const { return data_; } FMT_CONSTEXPR iterator end() const { return data_ + size_; } FMT_CONSTEXPR const Char& operator[](size_t pos) const { return data_[pos]; } FMT_CONSTEXPR void remove_prefix(size_t n) { data_ += n; size_ -= n; } std::string to_string() { return std::string((char *) data(), size()); } // Lexicographically compare this string reference to other. int compare(basic_string_view other) const { size_t str_size = size_ < other.size_ ? size_ : other.size_; int result = std::char_traits::compare(data_, other.data_, str_size); if (result == 0) result = size_ == other.size_ ? 0 : (size_ < other.size_ ? -1 : 1); return result; } friend bool operator==(basic_string_view lhs, basic_string_view rhs) { return lhs.compare(rhs) == 0; } friend bool operator!=(basic_string_view lhs, basic_string_view rhs) { return lhs.compare(rhs) != 0; } friend bool operator<(basic_string_view lhs, basic_string_view rhs) { return lhs.compare(rhs) < 0; } friend bool operator<=(basic_string_view lhs, basic_string_view rhs) { return lhs.compare(rhs) <= 0; } friend bool operator>(basic_string_view lhs, basic_string_view rhs) { return lhs.compare(rhs) > 0; } friend bool operator>=(basic_string_view lhs, basic_string_view rhs) { return lhs.compare(rhs) >= 0; } }; using string_view = basic_string_view; using wstring_view = basic_string_view; // A UTF-8 code unit type. #if FMT_HAS_FEATURE(__cpp_char8_t) typedef char8_t fmt_char8_t; #else typedef char fmt_char8_t; #endif /** Specifies if ``T`` is a character type. Can be specialized by users. */ template struct is_char : std::false_type {}; template <> struct is_char : std::true_type {}; template <> struct is_char : std::true_type {}; template <> struct is_char : std::true_type {}; template <> struct is_char : std::true_type {}; /** \rst Returns a string view of `s`. In order to add custom string type support to {fmt} provide an overload of `to_string_view` for it in the same namespace as the type for the argument-dependent lookup to work. **Example**:: namespace my_ns { inline string_view to_string_view(const my_string& s) { return {s.data(), s.length()}; } } std::string message = fmt::format(my_string("The answer is {}"), 42); \endrst */ template ::value)> inline basic_string_view to_string_view(const Char* s) { return s; } template inline basic_string_view to_string_view( const std::basic_string& s) { return s; } template inline basic_string_view to_string_view(basic_string_view s) { return s; } template >::value)> inline basic_string_view to_string_view( internal::std_string_view s) { return s; } // A base class for compile-time strings. It is defined in the fmt namespace to // make formatting functions visible via ADL, e.g. format(fmt("{}"), 42). struct compile_string {}; template struct is_compile_string : std::is_base_of {}; template ::value)> FMT_CONSTEXPR basic_string_view to_string_view(const S& s) { return s; } namespace internal { void to_string_view(...); using duckdb_fmt::v6::to_string_view; // Specifies whether S is a string type convertible to fmt::basic_string_view. // It should be a constexpr function but MSVC 2017 fails to compile it in // enable_if and MSVC 2015 fails to compile it as an alias template. template struct is_string : std::is_class()))> { }; template struct char_t_impl {}; template struct char_t_impl::value>> { using result = decltype(to_string_view(std::declval())); using type = typename result::char_type; }; struct error_handler { FMT_CONSTEXPR error_handler() = default; FMT_CONSTEXPR error_handler(const error_handler&) = default; // This function is intentionally not constexpr to give a compile-time error. FMT_NORETURN FMT_API void on_error(std::string message); }; } // namespace internal /** String's character type. */ template using char_t = typename internal::char_t_impl::type; /** \rst Parsing context consisting of a format string range being parsed and an argument counter for automatic indexing. You can use one of the following type aliases for common character types: +-----------------------+-------------------------------------+ | Type | Definition | +=======================+=====================================+ | format_parse_context | basic_format_parse_context | +-----------------------+-------------------------------------+ | wformat_parse_context | basic_format_parse_context | +-----------------------+-------------------------------------+ \endrst */ template class basic_format_parse_context : private ErrorHandler { private: basic_string_view format_str_; int next_arg_id_; public: using char_type = Char; using iterator = typename basic_string_view::iterator; explicit FMT_CONSTEXPR basic_format_parse_context( basic_string_view format_str, ErrorHandler eh = ErrorHandler()) : ErrorHandler(eh), format_str_(format_str), next_arg_id_(0) {} /** Returns an iterator to the beginning of the format string range being parsed. */ FMT_CONSTEXPR iterator begin() const FMT_NOEXCEPT { return format_str_.begin(); } /** Returns an iterator past the end of the format string range being parsed. */ FMT_CONSTEXPR iterator end() const FMT_NOEXCEPT { return format_str_.end(); } /** Advances the begin iterator to ``it``. */ FMT_CONSTEXPR void advance_to(iterator it) { format_str_.remove_prefix(internal::to_unsigned(it - begin())); } /** Reports an error if using the manual argument indexing; otherwise returns the next argument index and switches to the automatic indexing. */ FMT_CONSTEXPR int next_arg_id() { if (next_arg_id_ >= 0) return next_arg_id_++; on_error("cannot switch from manual to automatic argument indexing"); return 0; } /** Reports an error if using the automatic argument indexing; otherwise switches to the manual indexing. */ FMT_CONSTEXPR void check_arg_id(int) { if (next_arg_id_ > 0) on_error("cannot switch from automatic to manual argument indexing"); else next_arg_id_ = -1; } FMT_CONSTEXPR void check_arg_id(basic_string_view) {} FMT_CONSTEXPR void on_error(std::string message) { ErrorHandler::on_error(message); } FMT_CONSTEXPR ErrorHandler error_handler() const { return *this; } }; using format_parse_context = basic_format_parse_context; using wformat_parse_context = basic_format_parse_context; template using basic_parse_context FMT_DEPRECATED_ALIAS = basic_format_parse_context; using parse_context FMT_DEPRECATED_ALIAS = basic_format_parse_context; using wparse_context FMT_DEPRECATED_ALIAS = basic_format_parse_context; template class basic_format_arg; template class basic_format_args; // A formatter for objects of type T. template struct formatter { // A deleted default constructor indicates a disabled formatter. formatter() = delete; }; template struct FMT_DEPRECATED convert_to_int : bool_constant::value && std::is_convertible::value> {}; // Specifies if T has an enabled formatter specialization. A type can be // formattable even if it doesn't have a formatter e.g. via a conversion. template using has_formatter = std::is_constructible>; namespace internal { /** A contiguous memory buffer with an optional growing ability. */ template class buffer { private: T* ptr_; std::size_t size_; std::size_t capacity_; protected: // Don't initialize ptr_ since it is not accessed to save a few cycles. buffer(std::size_t sz) FMT_NOEXCEPT : size_(sz), capacity_(sz) {} buffer(T* p = nullptr, std::size_t sz = 0, std::size_t cap = 0) FMT_NOEXCEPT : ptr_(p), size_(sz), capacity_(cap) {} /** Sets the buffer data and capacity. */ void set(T* buf_data, std::size_t buf_capacity) FMT_NOEXCEPT { ptr_ = buf_data; capacity_ = buf_capacity; } /** Increases the buffer capacity to hold at least *capacity* elements. */ virtual void grow(std::size_t capacity) = 0; public: using value_type = T; using const_reference = const T&; buffer(const buffer&) = delete; void operator=(const buffer&) = delete; virtual ~buffer() = default; T* begin() FMT_NOEXCEPT { return ptr_; } T* end() FMT_NOEXCEPT { return ptr_ + size_; } /** Returns the size of this buffer. */ std::size_t size() const FMT_NOEXCEPT { return size_; } /** Returns the capacity of this buffer. */ std::size_t capacity() const FMT_NOEXCEPT { return capacity_; } /** Returns a pointer to the buffer data. */ T* data() FMT_NOEXCEPT { return ptr_; } /** Returns a pointer to the buffer data. */ const T* data() const FMT_NOEXCEPT { return ptr_; } /** Resizes the buffer. If T is a POD type new elements may not be initialized. */ void resize(std::size_t new_size) { reserve(new_size); size_ = new_size; } /** Clears this buffer. */ void clear() { size_ = 0; } /** Reserves space to store at least *capacity* elements. */ void reserve(std::size_t new_capacity) { if (new_capacity > capacity_) grow(new_capacity); } void push_back(const T& value) { reserve(size_ + 1); ptr_[size_++] = value; } /** Appends data to the end of the buffer. */ template void append(const U* begin, const U* end); T& operator[](std::size_t index) { return ptr_[index]; } const T& operator[](std::size_t index) const { return ptr_[index]; } }; // A container-backed buffer. template class container_buffer : public buffer { private: Container& container_; protected: void grow(std::size_t capacity) FMT_OVERRIDE { container_.resize(capacity); this->set(&container_[0], capacity); } public: explicit container_buffer(Container& c) : buffer(c.size()), container_(c) {} }; // Extracts a reference to the container from back_insert_iterator. template inline Container& get_container(std::back_insert_iterator it) { using bi_iterator = std::back_insert_iterator; struct accessor : bi_iterator { accessor(bi_iterator iter) : bi_iterator(iter) {} using bi_iterator::container; }; return *accessor(it).container; } template struct fallback_formatter { fallback_formatter() = delete; }; // Specifies if T has an enabled fallback_formatter specialization. template using has_fallback_formatter = std::is_constructible>; template struct named_arg_base; template struct named_arg; enum type { none_type, named_arg_type, // Integer types should go first, int_type, uint_type, long_long_type, ulong_long_type, int128_type, uint128_type, bool_type, char_type, last_integer_type = char_type, // followed by floating-point types. float_type, double_type, long_double_type, last_numeric_type = long_double_type, cstring_type, string_type, pointer_type, custom_type }; // Maps core type T to the corresponding type enum constant. template struct type_constant : std::integral_constant {}; #define FMT_TYPE_CONSTANT(Type, constant) \ template \ struct type_constant : std::integral_constant {} FMT_TYPE_CONSTANT(const named_arg_base&, named_arg_type); FMT_TYPE_CONSTANT(int, int_type); FMT_TYPE_CONSTANT(unsigned, uint_type); FMT_TYPE_CONSTANT(long long, long_long_type); FMT_TYPE_CONSTANT(unsigned long long, ulong_long_type); FMT_TYPE_CONSTANT(int128_t, int128_type); FMT_TYPE_CONSTANT(uint128_t, uint128_type); FMT_TYPE_CONSTANT(bool, bool_type); FMT_TYPE_CONSTANT(Char, char_type); FMT_TYPE_CONSTANT(float, float_type); FMT_TYPE_CONSTANT(double, double_type); FMT_TYPE_CONSTANT(long double, long_double_type); FMT_TYPE_CONSTANT(const Char*, cstring_type); FMT_TYPE_CONSTANT(basic_string_view, string_type); FMT_TYPE_CONSTANT(const void*, pointer_type); FMT_CONSTEXPR bool is_integral_type(type t) { FMT_ASSERT(t != named_arg_type, "invalid argument type"); return t > none_type && t <= last_integer_type; } FMT_CONSTEXPR bool is_arithmetic_type(type t) { FMT_ASSERT(t != named_arg_type, "invalid argument type"); return t > none_type && t <= last_numeric_type; } template struct string_value { const Char* data; std::size_t size; }; template struct custom_value { using parse_context = basic_format_parse_context; const void* value; void (*format)(const void* arg, parse_context& parse_ctx, Context& ctx); }; // A formatting argument value. template class value { public: using char_type = typename Context::char_type; union { int int_value; unsigned uint_value; long long long_long_value; unsigned long long ulong_long_value; int128_t int128_value; uint128_t uint128_value; bool bool_value; char_type char_value; float float_value; double double_value; long double long_double_value; const void* pointer; string_value string; custom_value custom; const named_arg_base* named_arg; }; FMT_CONSTEXPR value(int val = 0) : int_value(val) {} FMT_CONSTEXPR value(unsigned val) : uint_value(val) {} value(long long val) : long_long_value(val) {} value(unsigned long long val) : ulong_long_value(val) {} value(int128_t val) : int128_value(val) {} value(uint128_t val) : uint128_value(val) {} value(float val) : float_value(val) {} value(double val) : double_value(val) {} value(long double val) : long_double_value(val) {} value(bool val) : bool_value(val) {} value(char_type val) : char_value(val) {} value(const char_type* val) { string.data = val; } value(basic_string_view val) { string.data = val.data(); string.size = val.size(); } value(const void* val) : pointer(val) {} template value(const T& val) { custom.value = &val; // Get the formatter type through the context to allow different contexts // have different extension points, e.g. `formatter` for `format` and // `printf_formatter` for `printf`. custom.format = format_custom_arg< T, conditional_t::value, typename Context::template formatter_type, fallback_formatter>>; } value(const named_arg_base& val) { named_arg = &val; } private: // Formats an argument of a custom type, such as a user-defined class. template static void format_custom_arg( const void* arg, basic_format_parse_context& parse_ctx, Context& ctx) { Formatter f; parse_ctx.advance_to(f.parse(parse_ctx)); ctx.advance_to(f.format(*static_cast(arg), ctx)); } }; template FMT_CONSTEXPR basic_format_arg make_arg(const T& value); // To minimize the number of types we need to deal with, long is translated // either to int or to long long depending on its size. enum { long_short = sizeof(long) == sizeof(int) }; using long_type = conditional_t; using ulong_type = conditional_t; // Maps formatting arguments to core types. template struct arg_mapper { using char_type = typename Context::char_type; FMT_CONSTEXPR int map(signed char val) { return val; } FMT_CONSTEXPR unsigned map(unsigned char val) { return val; } FMT_CONSTEXPR int map(short val) { return val; } FMT_CONSTEXPR unsigned map(unsigned short val) { return val; } FMT_CONSTEXPR int map(int val) { return val; } FMT_CONSTEXPR unsigned map(unsigned val) { return val; } FMT_CONSTEXPR long_type map(long val) { return val; } FMT_CONSTEXPR ulong_type map(unsigned long val) { return val; } FMT_CONSTEXPR long long map(long long val) { return val; } FMT_CONSTEXPR unsigned long long map(unsigned long long val) { return val; } FMT_CONSTEXPR int128_t map(int128_t val) { return val; } FMT_CONSTEXPR uint128_t map(uint128_t val) { return val; } FMT_CONSTEXPR bool map(bool val) { return val; } template ::value)> FMT_CONSTEXPR char_type map(T val) { static_assert( std::is_same::value || std::is_same::value, "mixing character types is disallowed"); return val; } FMT_CONSTEXPR float map(float val) { return val; } FMT_CONSTEXPR double map(double val) { return val; } FMT_CONSTEXPR long double map(long double val) { return val; } FMT_CONSTEXPR const char_type* map(char_type* val) { return val; } FMT_CONSTEXPR const char_type* map(const char_type* val) { return val; } template ::value)> FMT_CONSTEXPR basic_string_view map(const T& val) { static_assert(std::is_same>::value, "mixing character types is disallowed"); return to_string_view(val); } template , T>::value && !is_string::value)> FMT_CONSTEXPR basic_string_view map(const T& val) { return basic_string_view(val); } template < typename T, FMT_ENABLE_IF( std::is_constructible, T>::value && !std::is_constructible, T>::value && !is_string::value && !has_formatter::value)> FMT_CONSTEXPR basic_string_view map(const T& val) { return std_string_view(val); } FMT_CONSTEXPR const char* map(const signed char* val) { static_assert(std::is_same::value, "invalid string type"); return reinterpret_cast(val); } FMT_CONSTEXPR const char* map(const unsigned char* val) { static_assert(std::is_same::value, "invalid string type"); return reinterpret_cast(val); } FMT_CONSTEXPR const void* map(void* val) { return val; } FMT_CONSTEXPR const void* map(const void* val) { return val; } FMT_CONSTEXPR const void* map(std::nullptr_t val) { return val; } template FMT_CONSTEXPR int map(const T*) { // Formatting of arbitrary pointers is disallowed. If you want to output // a pointer cast it to "void *" or "const void *". In particular, this // forbids formatting of "[const] volatile char *" which is printed as bool // by iostreams. static_assert(!sizeof(T), "formatting of non-void pointers is disallowed"); return 0; } template ::value && !has_formatter::value && !has_fallback_formatter::value)> FMT_CONSTEXPR auto map(const T& val) -> decltype( map(static_cast::type>(val))) { return map(static_cast::type>(val)); } template < typename T, FMT_ENABLE_IF( !is_string::value && !is_char::value && !std::is_constructible, T>::value && (has_formatter::value || (has_fallback_formatter::value && !std::is_constructible, T>::value)))> FMT_CONSTEXPR const T& map(const T& val) { return val; } template FMT_CONSTEXPR const named_arg_base& map( const named_arg& val) { auto arg = make_arg(val.value); std::memcpy(val.data, &arg, sizeof(arg)); return val; } }; // A type constant after applying arg_mapper. template using mapped_type_constant = type_constant().map(std::declval())), typename Context::char_type>; enum { packed_arg_bits = 5 }; // Maximum number of arguments with packed types. enum { max_packed_args = 63 / packed_arg_bits }; enum : unsigned long long { is_unpacked_bit = 1ULL << 63 }; template class arg_map; } // namespace internal // A formatting argument. It is a trivially copyable/constructible type to // allow storage in basic_memory_buffer. template class basic_format_arg { private: internal::value value_; internal::type type_; template friend FMT_CONSTEXPR basic_format_arg internal::make_arg( const T& value); template friend FMT_CONSTEXPR auto visit_format_arg(Visitor&& vis, const basic_format_arg& arg) -> decltype(vis(0)); friend class basic_format_args; friend class internal::arg_map; using char_type = typename Context::char_type; public: class handle { public: explicit handle(internal::custom_value custom) : custom_(custom) {} void format(basic_format_parse_context& parse_ctx, Context& ctx) const { custom_.format(custom_.value, parse_ctx, ctx); } private: internal::custom_value custom_; }; FMT_CONSTEXPR basic_format_arg() : type_(internal::none_type) {} FMT_CONSTEXPR explicit operator bool() const FMT_NOEXCEPT { return type_ != internal::none_type; } internal::type type() const { return type_; } bool is_integral() const { return internal::is_integral_type(type_); } bool is_arithmetic() const { return internal::is_arithmetic_type(type_); } }; /** \rst Visits an argument dispatching to the appropriate visit method based on the argument type. For example, if the argument type is ``double`` then ``vis(value)`` will be called with the value of type ``double``. \endrst */ template FMT_CONSTEXPR auto visit_format_arg(Visitor&& vis, const basic_format_arg& arg) -> decltype(vis(0)) { using char_type = typename Context::char_type; switch (arg.type_) { case internal::none_type: break; case internal::named_arg_type: FMT_ASSERT(false, "invalid argument type"); break; case internal::int_type: return vis(arg.value_.int_value); case internal::uint_type: return vis(arg.value_.uint_value); case internal::long_long_type: return vis(arg.value_.long_long_value); case internal::ulong_long_type: return vis(arg.value_.ulong_long_value); #if FMT_USE_INT128 case internal::int128_type: return vis(arg.value_.int128_value); case internal::uint128_type: return vis(arg.value_.uint128_value); #else case internal::int128_type: case internal::uint128_type: break; #endif case internal::bool_type: return vis(arg.value_.bool_value); case internal::char_type: return vis(arg.value_.char_value); case internal::float_type: return vis(arg.value_.float_value); case internal::double_type: return vis(arg.value_.double_value); case internal::long_double_type: return vis(arg.value_.long_double_value); case internal::cstring_type: return vis(arg.value_.string.data); case internal::string_type: return vis(basic_string_view(arg.value_.string.data, arg.value_.string.size)); case internal::pointer_type: return vis(arg.value_.pointer); case internal::custom_type: return vis(typename basic_format_arg::handle(arg.value_.custom)); } return vis(monostate()); } namespace internal { // A map from argument names to their values for named arguments. template class arg_map { private: using char_type = typename Context::char_type; struct entry { basic_string_view name; basic_format_arg arg; }; entry* map_; unsigned size_; void push_back(value val) { const auto& named = *val.named_arg; map_[size_] = {named.name, named.template deserialize()}; ++size_; } public: arg_map(const arg_map&) = delete; void operator=(const arg_map&) = delete; arg_map() : map_(nullptr), size_(0) {} void init(const basic_format_args& args); ~arg_map() { delete[] map_; } basic_format_arg find(basic_string_view name) const { // The list is unsorted, so just return the first matching name. for (entry *it = map_, *end = map_ + size_; it != end; ++it) { if (it->name == name) return it->arg; } return {}; } }; // A type-erased reference to an std::locale to avoid heavy include. class locale_ref { private: const void* locale_; // A type-erased pointer to std::locale. public: locale_ref() : locale_(nullptr) {} template explicit locale_ref(const Locale& loc); explicit operator bool() const FMT_NOEXCEPT { return locale_ != nullptr; } template Locale get() const; }; template constexpr unsigned long long encode_types() { return 0; } template constexpr unsigned long long encode_types() { return mapped_type_constant::value | (encode_types() << packed_arg_bits); } template FMT_CONSTEXPR basic_format_arg make_arg(const T& value) { basic_format_arg arg; arg.type_ = mapped_type_constant::value; arg.value_ = arg_mapper().map(value); return arg; } template inline value make_arg(const T& val) { return arg_mapper().map(val); } template inline basic_format_arg make_arg(const T& value) { return make_arg(value); } } // namespace internal // Formatting context. template class basic_format_context { public: /** The character type for the output. */ using char_type = Char; private: OutputIt out_; basic_format_args args_; internal::arg_map map_; internal::locale_ref loc_; public: using iterator = OutputIt; using format_arg = basic_format_arg; template using formatter_type = formatter; basic_format_context(const basic_format_context&) = delete; void operator=(const basic_format_context&) = delete; /** Constructs a ``basic_format_context`` object. References to the arguments are stored in the object so make sure they have appropriate lifetimes. */ basic_format_context(OutputIt out, basic_format_args ctx_args, internal::locale_ref loc = internal::locale_ref()) : out_(out), args_(ctx_args), loc_(loc) {} format_arg arg(int id) const { return args_.get(id); } // Checks if manual indexing is used and returns the argument with the // specified name. format_arg arg(basic_string_view name); internal::error_handler error_handler() { return {}; } void on_error(std::string message) { error_handler().on_error(message); } // Returns an iterator to the beginning of the output range. iterator out() { return out_; } // Advances the begin iterator to ``it``. void advance_to(iterator it) { out_ = it; } internal::locale_ref locale() { return loc_; } }; template using buffer_context = basic_format_context>, Char>; using format_context = buffer_context; using wformat_context = buffer_context; /** \rst An array of references to arguments. It can be implicitly converted into `~fmt::basic_format_args` for passing into type-erased formatting functions such as `~fmt::vformat`. \endrst */ template class format_arg_store { private: static const size_t num_args = sizeof...(Args); static const bool is_packed = num_args < internal::max_packed_args; using value_type = conditional_t, basic_format_arg>; // If the arguments are not packed, add one more element to mark the end. value_type data_[num_args + (num_args == 0 ? 1 : 0)]; friend class basic_format_args; public: static constexpr unsigned long long types = is_packed ? internal::encode_types() : internal::is_unpacked_bit | num_args; format_arg_store(const Args&... args) : data_{internal::make_arg(args)...} {} }; /** \rst Constructs an `~fmt::format_arg_store` object that contains references to arguments and can be implicitly converted to `~fmt::format_args`. `Context` can be omitted in which case it defaults to `~fmt::context`. See `~fmt::arg` for lifetime considerations. \endrst */ template inline format_arg_store make_format_args( const Args&... args) { return {args...}; } /** Formatting arguments. */ template class basic_format_args { public: using size_type = int; using format_arg = basic_format_arg; private: // To reduce compiled code size per formatting function call, types of first // max_packed_args arguments are passed in the types_ field. unsigned long long types_; union { // If the number of arguments is less than max_packed_args, the argument // values are stored in values_, otherwise they are stored in args_. // This is done to reduce compiled code size as storing larger objects // may require more code (at least on x86-64) even if the same amount of // data is actually copied to stack. It saves ~10% on the bloat test. const internal::value* values_; const format_arg* args_; }; bool is_packed() const { return (types_ & internal::is_unpacked_bit) == 0; } internal::type type(int index) const { int shift = index * internal::packed_arg_bits; unsigned int mask = (1 << internal::packed_arg_bits) - 1; return static_cast((types_ >> shift) & mask); } friend class internal::arg_map; void set_data(const internal::value* values) { values_ = values; } void set_data(const format_arg* args) { args_ = args; } format_arg do_get(int index) const { format_arg arg; if (!is_packed()) { auto num_args = max_size(); if (index < num_args) arg = args_[index]; return arg; } if (index > internal::max_packed_args) return arg; arg.type_ = type(index); if (arg.type_ == internal::none_type) return arg; internal::value& val = arg.value_; val = values_[index]; return arg; } public: basic_format_args() : types_(0) {} /** \rst Constructs a `basic_format_args` object from `~fmt::format_arg_store`. \endrst */ template basic_format_args(const format_arg_store& store) : types_(store.types) { set_data(store.data_); } /** \rst Constructs a `basic_format_args` object from a dynamic set of arguments. \endrst */ basic_format_args(const format_arg* args, int count) : types_(internal::is_unpacked_bit | internal::to_unsigned(count)) { set_data(args); } /** Returns the argument at specified index. */ format_arg get(int index) const { format_arg arg = do_get(index); if (arg.type_ == internal::named_arg_type) arg = arg.value_.named_arg->template deserialize(); return arg; } int max_size() const { unsigned long long max_packed = internal::max_packed_args; return static_cast(is_packed() ? max_packed : types_ & ~internal::is_unpacked_bit); } }; /** An alias to ``basic_format_args``. */ // It is a separate type rather than an alias to make symbols readable. struct format_args : basic_format_args { template format_args(Args&&... args) : basic_format_args(std::forward(args)...) {} }; struct wformat_args : basic_format_args { template wformat_args(Args&&... args) : basic_format_args(std::forward(args)...) {} }; template struct is_contiguous : std::false_type {}; template struct is_contiguous> : std::true_type {}; template struct is_contiguous> : std::true_type {}; namespace internal { template struct is_contiguous_back_insert_iterator : std::false_type {}; template struct is_contiguous_back_insert_iterator> : is_contiguous {}; template struct named_arg_base { basic_string_view name; // Serialized value. mutable char data[sizeof(basic_format_arg>)]; named_arg_base(basic_string_view nm) : name(nm) {} template basic_format_arg deserialize() const { basic_format_arg arg; std::memcpy(&arg, data, sizeof(basic_format_arg)); return arg; } }; template struct named_arg : named_arg_base { const T& value; named_arg(basic_string_view name, const T& val) : named_arg_base(name), value(val) {} }; template ::value)> inline void check_format_string(const S&) { #if defined(FMT_ENFORCE_COMPILE_STRING) static_assert(is_compile_string::value, "FMT_ENFORCE_COMPILE_STRING requires all format strings to " "utilize FMT_STRING() or fmt()."); #endif } template ::value)> void check_format_string(S); struct view {}; template struct bool_pack; template using all_true = std::is_same, bool_pack>; template > inline format_arg_store, remove_reference_t...> make_args_checked(const S& format_str, const remove_reference_t&... args) { static_assert(all_true<(!std::is_base_of>() || !std::is_reference())...>::value, "passing views as lvalues is disallowed"); check_format_string>...>(format_str); return {args...}; } template std::basic_string vformat(basic_string_view format_str, basic_format_args> args); template typename buffer_context::iterator vformat_to( buffer& buf, basic_string_view format_str, basic_format_args> args); } // namespace internal /** \rst Returns a named argument to be used in a formatting function. The named argument holds a reference and does not extend the lifetime of its arguments. Consequently, a dangling reference can accidentally be created. The user should take care to only pass this function temporaries when the named argument is itself a temporary, as per the following example. **Example**:: fmt::print("Elapsed time: {s:.2f} seconds", fmt::arg("s", 1.23)); \endrst */ template > inline internal::named_arg arg(const S& name, const T& arg) { static_assert(internal::is_string::value, ""); return {name, arg}; } // Disable nested named arguments, e.g. ``arg("a", arg("b", 42))``. template void arg(S, internal::named_arg) = delete; /** Formats a string and writes the output to ``out``. */ // GCC 8 and earlier cannot handle std::back_insert_iterator with // vformat_to(...) overload, so SFINAE on iterator type instead. template , FMT_ENABLE_IF( internal::is_contiguous_back_insert_iterator::value)> OutputIt vformat_to(OutputIt out, const S& format_str, basic_format_args> args) { using container = remove_reference_t; internal::container_buffer buf((internal::get_container(out))); internal::vformat_to(buf, to_string_view(format_str), args); return out; } template ::value&& internal::is_string::value)> inline std::back_insert_iterator format_to( std::back_insert_iterator out, const S& format_str, Args&&... args) { return vformat_to( out, to_string_view(format_str), {internal::make_args_checked(format_str, args...)}); } template > inline std::basic_string vformat( const S& format_str, basic_format_args> args) { return internal::vformat(to_string_view(format_str), args); } /** \rst Formats arguments and returns the result as a string. **Example**:: #include std::string message = fmt::format("The answer is {}", 42); \endrst */ // Pass char_t as a default template parameter instead of using // std::basic_string> to reduce the symbol size. template > inline std::basic_string format(const S& format_str, Args&&... args) { return internal::vformat( to_string_view(format_str), {internal::make_args_checked(format_str, args...)}); } FMT_END_NAMESPACE #endif // FMT_CORE_H_ // LICENSE_CHANGE_END #include #include #include #include #include #include #include #ifdef __clang__ # define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__) #else # define FMT_CLANG_VERSION 0 #endif #ifdef __INTEL_COMPILER # define FMT_ICC_VERSION __INTEL_COMPILER #elif defined(__ICL) # define FMT_ICC_VERSION __ICL #else # define FMT_ICC_VERSION 0 #endif #ifdef __NVCC__ # define FMT_CUDA_VERSION (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__) #else # define FMT_CUDA_VERSION 0 #endif #ifdef __has_builtin # define FMT_HAS_BUILTIN(x) __has_builtin(x) #else # define FMT_HAS_BUILTIN(x) 0 #endif #if FMT_HAS_CPP_ATTRIBUTE(fallthrough) && \ (__cplusplus >= 201703 || FMT_GCC_VERSION != 0) # define FMT_FALLTHROUGH [[fallthrough]] #else # define FMT_FALLTHROUGH #endif #ifndef FMT_THROW # if FMT_EXCEPTIONS # if FMT_MSC_VER FMT_BEGIN_NAMESPACE namespace internal { template inline void do_throw(const Exception& x) { // Silence unreachable code warnings in MSVC because these are nearly // impossible to fix in a generic code. volatile bool b = true; if (b) throw x; } } // namespace internal FMT_END_NAMESPACE # define FMT_THROW(x) internal::do_throw(x) # else # define FMT_THROW(x) throw x # endif # else # define FMT_THROW(x) \ do { \ static_cast(sizeof(x)); \ FMT_ASSERT(false, ""); \ } while (false) # endif #endif #ifndef FMT_USE_USER_DEFINED_LITERALS // For Intel and NVIDIA compilers both they and the system gcc/msc support UDLs. # if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 407 || \ FMT_MSC_VER >= 1900) && \ (!(FMT_ICC_VERSION || FMT_CUDA_VERSION) || FMT_ICC_VERSION >= 1500 || \ FMT_CUDA_VERSION >= 700) # define FMT_USE_USER_DEFINED_LITERALS 1 # else # define FMT_USE_USER_DEFINED_LITERALS 0 # endif #endif #ifndef FMT_USE_UDL_TEMPLATE #define FMT_USE_UDL_TEMPLATE 0 #endif // __builtin_clz is broken in clang with Microsoft CodeGen: // https://github.com/fmtlib/fmt/issues/519 #if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_clz)) && !FMT_MSC_VER # define FMT_BUILTIN_CLZ(n) __builtin_clz(n) #endif #if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_clzll)) && !FMT_MSC_VER # define FMT_BUILTIN_CLZLL(n) __builtin_clzll(n) #endif // Some compilers masquerade as both MSVC and GCC-likes or otherwise support // __builtin_clz and __builtin_clzll, so only define FMT_BUILTIN_CLZ using the // MSVC intrinsics if the clz and clzll builtins are not available. #if FMT_MSC_VER && !defined(FMT_BUILTIN_CLZLL) && !defined(_MANAGED) # include // _BitScanReverse, _BitScanReverse64 FMT_BEGIN_NAMESPACE namespace internal { // Avoid Clang with Microsoft CodeGen's -Wunknown-pragmas warning. # ifndef __clang__ # pragma intrinsic(_BitScanReverse) # endif inline uint32_t clz(uint32_t x) { unsigned long r = 0; _BitScanReverse(&r, x); FMT_ASSERT(x != 0, ""); // Static analysis complains about using uninitialized data // "r", but the only way that can happen is if "x" is 0, // which the callers guarantee to not happen. # pragma warning(suppress : 6102) return 31 - r; } # define FMT_BUILTIN_CLZ(n) internal::clz(n) # if defined(_WIN64) && !defined(__clang__) # pragma intrinsic(_BitScanReverse64) # endif inline uint32_t clzll(uint64_t x) { unsigned long r = 0; # ifdef _WIN64 _BitScanReverse64(&r, x); # else // Scan the high 32 bits. if (_BitScanReverse(&r, static_cast(x >> 32))) return 63 - (r + 32); // Scan the low 32 bits. _BitScanReverse(&r, static_cast(x)); # endif FMT_ASSERT(x != 0, ""); // Static analysis complains about using uninitialized data // "r", but the only way that can happen is if "x" is 0, // which the callers guarantee to not happen. # pragma warning(suppress : 6102) return 63 - r; } # define FMT_BUILTIN_CLZLL(n) internal::clzll(n) } // namespace internal FMT_END_NAMESPACE #endif // Enable the deprecated numeric alignment. #ifndef FMT_NUMERIC_ALIGN # define FMT_NUMERIC_ALIGN 1 #endif // Enable the deprecated percent specifier. #ifndef FMT_DEPRECATED_PERCENT # define FMT_DEPRECATED_PERCENT 0 #endif FMT_BEGIN_NAMESPACE namespace internal { // A helper function to suppress bogus "conditional expression is constant" // warnings. template inline T const_check(T value) { return value; } // An equivalent of `*reinterpret_cast(&source)` that doesn't have // undefined behavior (e.g. due to type aliasing). // Example: uint64_t d = bit_cast(2.718); template inline Dest bit_cast(const Source& source) { static_assert(sizeof(Dest) == sizeof(Source), "size mismatch"); Dest dest; std::memcpy(&dest, &source, sizeof(dest)); return dest; } inline bool is_big_endian() { auto u = 1u; struct bytes { char data[sizeof(u)]; }; return bit_cast(u).data[0] == 0; } // A fallback implementation of uintptr_t for systems that lack it. struct fallback_uintptr { unsigned char value[sizeof(void*)]; fallback_uintptr() = default; explicit fallback_uintptr(const void* p) { *this = bit_cast(p); if (is_big_endian()) { for (size_t i = 0, j = sizeof(void*) - 1; i < j; ++i, --j) std::swap(value[i], value[j]); } } }; #ifdef UINTPTR_MAX using uintptr_t = ::uintptr_t; inline uintptr_t to_uintptr(const void* p) { return bit_cast(p); } #else using uintptr_t = fallback_uintptr; inline fallback_uintptr to_uintptr(const void* p) { return fallback_uintptr(p); } #endif // Returns the largest possible value for type T. Same as // std::numeric_limits::max() but shorter and not affected by the max macro. template constexpr T max_value() { return (std::numeric_limits::max)(); } template constexpr int num_bits() { return std::numeric_limits::digits; } template <> constexpr int num_bits() { return static_cast(sizeof(void*) * std::numeric_limits::digits); } // An approximation of iterator_t for pre-C++20 systems. template using iterator_t = decltype(std::begin(std::declval())); // Detect the iterator category of *any* given type in a SFINAE-friendly way. // Unfortunately, older implementations of std::iterator_traits are not safe // for use in a SFINAE-context. template struct iterator_category : std::false_type {}; template struct iterator_category { using type = std::random_access_iterator_tag; }; template struct iterator_category> { using type = typename It::iterator_category; }; // Detect if *any* given type models the OutputIterator concept. template class is_output_iterator { // Check for mutability because all iterator categories derived from // std::input_iterator_tag *may* also meet the requirements of an // OutputIterator, thereby falling into the category of 'mutable iterators' // [iterator.requirements.general] clause 4. The compiler reveals this // property only at the point of *actually dereferencing* the iterator! template static decltype(*(std::declval())) test(std::input_iterator_tag); template static char& test(std::output_iterator_tag); template static const char& test(...); using type = decltype(test(typename iterator_category::type{})); public: static const bool value = !std::is_const>::value; }; // A workaround for std::string not having mutable data() until C++17. template inline Char* get_data(std::basic_string& s) { return &s[0]; } template inline typename Container::value_type* get_data(Container& c) { return c.data(); } #ifdef _SECURE_SCL // Make a checked iterator to avoid MSVC warnings. template using checked_ptr = stdext::checked_array_iterator; template checked_ptr make_checked(T* p, std::size_t size) { return {p, size}; } #else template using checked_ptr = T*; template inline T* make_checked(T* p, std::size_t) { return p; } #endif template ::value)> inline checked_ptr reserve( std::back_insert_iterator& it, std::size_t n) { Container& c = get_container(it); std::size_t size = c.size(); c.resize(size + n); return make_checked(get_data(c) + size, n); } template inline Iterator& reserve(Iterator& it, std::size_t) { return it; } // An output iterator that counts the number of objects written to it and // discards them. class counting_iterator { private: std::size_t count_; public: using iterator_category = std::output_iterator_tag; using difference_type = std::ptrdiff_t; using pointer = void; using reference = void; using _Unchecked_type = counting_iterator; // Mark iterator as checked. struct value_type { template void operator=(const T&) {} }; counting_iterator() : count_(0) {} std::size_t count() const { return count_; } counting_iterator& operator++() { ++count_; return *this; } counting_iterator operator++(int) { auto it = *this; ++*this; return it; } value_type operator*() const { return {}; } }; template class truncating_iterator_base { protected: OutputIt out_; std::size_t limit_; std::size_t count_; truncating_iterator_base(OutputIt out, std::size_t limit) : out_(out), limit_(limit), count_(0) {} public: using iterator_category = std::output_iterator_tag; using difference_type = void; using pointer = void; using reference = void; using _Unchecked_type = truncating_iterator_base; // Mark iterator as checked. OutputIt base() const { return out_; } std::size_t count() const { return count_; } }; // An output iterator that truncates the output and counts the number of objects // written to it. template ::value_type>::type> class truncating_iterator; template class truncating_iterator : public truncating_iterator_base { using traits = std::iterator_traits; mutable typename traits::value_type blackhole_; public: using value_type = typename traits::value_type; truncating_iterator(OutputIt out, std::size_t limit) : truncating_iterator_base(out, limit) {} truncating_iterator& operator++() { if (this->count_++ < this->limit_) ++this->out_; return *this; } truncating_iterator operator++(int) { auto it = *this; ++*this; return it; } value_type& operator*() const { return this->count_ < this->limit_ ? *this->out_ : blackhole_; } }; template class truncating_iterator : public truncating_iterator_base { public: using value_type = typename OutputIt::container_type::value_type; truncating_iterator(OutputIt out, std::size_t limit) : truncating_iterator_base(out, limit) {} truncating_iterator& operator=(value_type val) { if (this->count_++ < this->limit_) this->out_ = val; return *this; } truncating_iterator& operator++() { return *this; } truncating_iterator& operator++(int) { return *this; } truncating_iterator& operator*() { return *this; } }; // A range with the specified output iterator and value type. template class output_range { private: OutputIt it_; public: using value_type = T; using iterator = OutputIt; struct sentinel {}; explicit output_range(OutputIt it) : it_(it) {} OutputIt begin() const { return it_; } sentinel end() const { return {}; } // Sentinel is not used yet. }; template inline size_t count_code_points(basic_string_view s) { return s.size(); } // Counts the number of code points in a UTF-8 string. inline size_t count_code_points(basic_string_view s) { const fmt_char8_t* data = s.data(); size_t num_code_points = 0; for (size_t i = 0, size = s.size(); i != size; ++i) { if ((data[i] & 0xc0) != 0x80) ++num_code_points; } return num_code_points; } template inline size_t code_point_index(basic_string_view s, size_t n) { size_t size = s.size(); return n < size ? n : size; } // Calculates the index of the nth code point in a UTF-8 string. inline size_t code_point_index(basic_string_view s, size_t n) { const fmt_char8_t* data = s.data(); size_t num_code_points = 0; for (size_t i = 0, size = s.size(); i != size; ++i) { if ((data[i] & 0xc0) != 0x80 && ++num_code_points > n) { return i; } } return s.size(); } inline fmt_char8_t to_fmt_char8_t(char c) { return static_cast(c); } template using needs_conversion = bool_constant< std::is_same::value_type, char>::value && std::is_same::value>; template ::value)> OutputIt copy_str(InputIt begin, InputIt end, OutputIt it) { return std::copy(begin, end, it); } template ::value)> OutputIt copy_str(InputIt begin, InputIt end, OutputIt it) { return std::transform(begin, end, it, to_fmt_char8_t); } #ifndef FMT_USE_GRISU # define FMT_USE_GRISU 1 #endif template constexpr bool use_grisu() { return FMT_USE_GRISU && std::numeric_limits::is_iec559 && sizeof(T) <= sizeof(double); } template template void buffer::append(const U* begin, const U* end) { std::size_t new_size = size_ + to_unsigned(end - begin); reserve(new_size); std::uninitialized_copy(begin, end, make_checked(ptr_, capacity_) + size_); size_ = new_size; } } // namespace internal // A range with an iterator appending to a buffer. template class buffer_range : public internal::output_range< std::back_insert_iterator>, T> { public: using iterator = std::back_insert_iterator>; using internal::output_range::output_range; buffer_range(internal::buffer& buf) : internal::output_range(std::back_inserter(buf)) {} }; // A UTF-8 string view. class u8string_view : public basic_string_view { public: u8string_view(const char* s) : basic_string_view(reinterpret_cast(s)) {} u8string_view(const char* s, size_t count) FMT_NOEXCEPT : basic_string_view(reinterpret_cast(s), count) { } }; #if FMT_USE_USER_DEFINED_LITERALS inline namespace literals { inline u8string_view operator"" _u(const char* s, std::size_t n) { return {s, n}; } } // namespace literals #endif // The number of characters to store in the basic_memory_buffer object itself // to avoid dynamic memory allocation. enum { inline_buffer_size = 500 }; /** \rst A dynamically growing memory buffer for trivially copyable/constructible types with the first ``SIZE`` elements stored in the object itself. You can use one of the following type aliases for common character types: +----------------+------------------------------+ | Type | Definition | +================+==============================+ | memory_buffer | basic_memory_buffer | +----------------+------------------------------+ | wmemory_buffer | basic_memory_buffer | +----------------+------------------------------+ **Example**:: fmt::memory_buffer out; format_to(out, "The answer is {}.", 42); This will append the following output to the ``out`` object: .. code-block:: none The answer is 42. The output can be converted to an ``std::string`` with ``to_string(out)``. \endrst */ template > class basic_memory_buffer : private Allocator, public internal::buffer { private: T store_[SIZE]; // Deallocate memory allocated by the buffer. void deallocate() { T* data = this->data(); if (data != store_) Allocator::deallocate(data, this->capacity()); } protected: void grow(std::size_t size) FMT_OVERRIDE; public: using value_type = T; using const_reference = const T&; explicit basic_memory_buffer(const Allocator& alloc = Allocator()) : Allocator(alloc) { this->set(store_, SIZE); } ~basic_memory_buffer() FMT_OVERRIDE { deallocate(); } private: // Move data from other to this buffer. void move(basic_memory_buffer& other) { Allocator &this_alloc = *this, &other_alloc = other; this_alloc = std::move(other_alloc); T* data = other.data(); std::size_t size = other.size(), capacity = other.capacity(); if (data == other.store_) { this->set(store_, capacity); std::uninitialized_copy(other.store_, other.store_ + size, internal::make_checked(store_, capacity)); } else { this->set(data, capacity); // Set pointer to the inline array so that delete is not called // when deallocating. other.set(other.store_, 0); } this->resize(size); } public: /** \rst Constructs a :class:`fmt::basic_memory_buffer` object moving the content of the other object to it. \endrst */ basic_memory_buffer(basic_memory_buffer&& other) FMT_NOEXCEPT { move(other); } /** \rst Moves the content of the other ``basic_memory_buffer`` object to this one. \endrst */ basic_memory_buffer& operator=(basic_memory_buffer&& other) FMT_NOEXCEPT { FMT_ASSERT(this != &other, ""); deallocate(); move(other); return *this; } // Returns a copy of the allocator associated with this buffer. Allocator get_allocator() const { return *this; } }; template void basic_memory_buffer::grow(std::size_t size) { #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION if (size > 1000) throw std::runtime_error("fuzz mode - won't grow that much"); #endif std::size_t old_capacity = this->capacity(); std::size_t new_capacity = old_capacity + old_capacity / 2; if (size > new_capacity) new_capacity = size; T* old_data = this->data(); T* new_data = std::allocator_traits::allocate(*this, new_capacity); // The following code doesn't throw, so the raw pointer above doesn't leak. std::uninitialized_copy(old_data, old_data + this->size(), internal::make_checked(new_data, new_capacity)); this->set(new_data, new_capacity); // deallocate must not throw according to the standard, but even if it does, // the buffer already uses the new storage and will deallocate it in // destructor. if (old_data != store_) Allocator::deallocate(old_data, old_capacity); } using memory_buffer = basic_memory_buffer; using wmemory_buffer = basic_memory_buffer; namespace internal { // Returns true if value is negative, false otherwise. // Same as `value < 0` but doesn't produce warnings if T is an unsigned type. template ::is_signed)> FMT_CONSTEXPR bool is_negative(T value) { return value < 0; } template ::is_signed)> FMT_CONSTEXPR bool is_negative(T) { return false; } // Smallest of uint32_t, uint64_t, uint128_t that is large enough to // represent all values of T. template using uint32_or_64_or_128_t = conditional_t< std::numeric_limits::digits <= 32, uint32_t, conditional_t::digits <= 64, uint64_t, uint128_t>>; // Static data is placed in this class template for the header-only config. template struct FMT_EXTERN_TEMPLATE_API basic_data { static const uint64_t powers_of_10_64[]; static const uint32_t zero_or_powers_of_10_32[]; static const uint64_t zero_or_powers_of_10_64[]; static const uint64_t pow10_significands[]; static const int16_t pow10_exponents[]; static const char digits[]; static const char hex_digits[]; static const char foreground_color[]; static const char background_color[]; static const char reset_color[5]; static const wchar_t wreset_color[5]; static const char signs[]; }; FMT_EXTERN template struct basic_data; // This is a struct rather than an alias to avoid shadowing warnings in gcc. struct data : basic_data<> {}; #ifdef FMT_BUILTIN_CLZLL // Returns the number of decimal digits in n. Leading zeros are not counted // except for n == 0 in which case count_digits returns 1. inline int count_digits(uint64_t n) { // Based on http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 // and the benchmark https://github.com/localvoid/cxx-benchmark-count-digits. int t = (64 - FMT_BUILTIN_CLZLL(n | 1)) * 1233 >> 12; return t - (n < data::zero_or_powers_of_10_64[t]) + 1; } #else // Fallback version of count_digits used when __builtin_clz is not available. inline int count_digits(uint64_t n) { int count = 1; for (;;) { // Integer division is slow so do it for a group of four digits instead // of for every digit. The idea comes from the talk by Alexandrescu // "Three Optimization Tips for C++". See speed-test for a comparison. if (n < 10) return count; if (n < 100) return count + 1; if (n < 1000) return count + 2; if (n < 10000) return count + 3; n /= 10000u; count += 4; } } #endif #if FMT_USE_INT128 inline int count_digits(uint128_t n) { int count = 1; for (;;) { // Integer division is slow so do it for a group of four digits instead // of for every digit. The idea comes from the talk by Alexandrescu // "Three Optimization Tips for C++". See speed-test for a comparison. if (n < 10) return count; if (n < 100) return count + 1; if (n < 1000) return count + 2; if (n < 10000) return count + 3; n /= 10000U; count += 4; } } #endif // Counts the number of digits in n. BITS = log2(radix). template inline int count_digits(UInt n) { int num_digits = 0; do { ++num_digits; } while ((n >>= BITS) != 0); return num_digits; } template <> int count_digits<4>(internal::fallback_uintptr n); #if FMT_GCC_VERSION || FMT_CLANG_VERSION # define FMT_ALWAYS_INLINE inline __attribute__((always_inline)) #else # define FMT_ALWAYS_INLINE #endif #ifdef FMT_BUILTIN_CLZ // Optional version of count_digits for better performance on 32-bit platforms. inline int count_digits(uint32_t n) { int t = (32 - FMT_BUILTIN_CLZ(n | 1)) * 1233 >> 12; return t - (n < data::zero_or_powers_of_10_32[t]) + 1; } #endif template FMT_API std::string grouping_impl(locale_ref loc); template inline std::string grouping(locale_ref loc) { return grouping_impl(loc); } template <> inline std::string grouping(locale_ref loc) { return grouping_impl(loc); } template FMT_API Char thousands_sep_impl(locale_ref loc); template inline Char thousands_sep(locale_ref loc) { return Char(thousands_sep_impl(loc)); } template <> inline wchar_t thousands_sep(locale_ref loc) { return thousands_sep_impl(loc); } template FMT_API Char decimal_point_impl(locale_ref loc); template inline Char decimal_point(locale_ref loc) { return Char(decimal_point_impl(loc)); } template <> inline wchar_t decimal_point(locale_ref loc) { return decimal_point_impl(loc); } // Formats a decimal unsigned integer value writing into buffer. // add_thousands_sep is called after writing each char to add a thousands // separator if necessary. template inline Char* format_decimal(Char* buffer, UInt value, int num_digits, F add_thousands_sep) { FMT_ASSERT(num_digits >= 0, "invalid digit count"); buffer += num_digits; Char* end = buffer; while (value >= 100) { // Integer division is slow so do it for a group of two digits instead // of for every digit. The idea comes from the talk by Alexandrescu // "Three Optimization Tips for C++". See speed-test for a comparison. auto index = static_cast((value % 100) * 2); value /= 100; *--buffer = static_cast(data::digits[index + 1]); add_thousands_sep(buffer); *--buffer = static_cast(data::digits[index]); add_thousands_sep(buffer); } if (value < 10) { *--buffer = static_cast('0' + value); return end; } auto index = static_cast(value * 2); *--buffer = static_cast(data::digits[index + 1]); add_thousands_sep(buffer); *--buffer = static_cast(data::digits[index]); return end; } template constexpr int digits10() noexcept { return std::numeric_limits::digits10; } template <> constexpr int digits10() noexcept { return 38; } template <> constexpr int digits10() noexcept { return 38; } template inline Iterator format_decimal(Iterator out, UInt value, int num_digits, F add_thousands_sep) { FMT_ASSERT(num_digits >= 0, "invalid digit count"); // Buffer should be large enough to hold all digits (<= digits10 + 1). enum { max_size = digits10() + 1 }; Char buffer[2 * max_size]; auto end = format_decimal(buffer, value, num_digits, add_thousands_sep); return internal::copy_str(buffer, end, out); } template inline It format_decimal(It out, UInt value, int num_digits) { return format_decimal(out, value, num_digits, [](Char*) {}); } template inline Char* format_uint(Char* buffer, UInt value, int num_digits, bool upper = false) { buffer += num_digits; Char* end = buffer; do { const char* digits = upper ? "0123456789ABCDEF" : data::hex_digits; unsigned digit = (value & ((1 << BASE_BITS) - 1)); *--buffer = static_cast(BASE_BITS < 4 ? static_cast('0' + digit) : digits[digit]); } while ((value >>= BASE_BITS) != 0); return end; } template Char* format_uint(Char* buffer, internal::fallback_uintptr n, int num_digits, bool = false) { auto char_digits = std::numeric_limits::digits / 4; int start = (num_digits + char_digits - 1) / char_digits - 1; if (int start_digits = num_digits % char_digits) { unsigned value = n.value[start--]; buffer = format_uint(buffer, value, start_digits); } for (; start >= 0; --start) { unsigned value = n.value[start]; buffer += char_digits; auto p = buffer; for (int i = 0; i < char_digits; ++i) { unsigned digit = (value & ((1 << BASE_BITS) - 1)); *--p = static_cast(data::hex_digits[digit]); value >>= BASE_BITS; } } return buffer; } template inline It format_uint(It out, UInt value, int num_digits, bool upper = false) { // Buffer should be large enough to hold all digits (digits / BASE_BITS + 1). char buffer[num_bits() / BASE_BITS + 1]; format_uint(buffer, value, num_digits, upper); return internal::copy_str(buffer, buffer + num_digits, out); } template struct null {}; // Workaround an array initialization issue in gcc 4.8. template struct fill_t { private: Char data_[6]; public: FMT_CONSTEXPR Char& operator[](size_t index) { return data_[index]; } FMT_CONSTEXPR const Char& operator[](size_t index) const { return data_[index]; } static FMT_CONSTEXPR fill_t make() { auto fill = fill_t(); fill[0] = Char(' '); return fill; } }; } // namespace internal // We cannot use enum classes as bit fields because of a gcc bug // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61414. namespace align { enum type { none, left, right, center, numeric }; } using align_t = align::type; namespace sign { enum type { none, minus, plus, space }; } using sign_t = sign::type; // Format specifiers for built-in and string types. template struct basic_format_specs { int width; int precision; char type; align_t align : 4; sign_t sign : 3; bool alt : 1; // Alternate form ('#'). internal::fill_t fill; char thousands; constexpr basic_format_specs() : width(0), precision(-1), type(0), align(align::none), sign(sign::none), alt(false), fill(internal::fill_t::make()), thousands('\0'){} }; using format_specs = basic_format_specs; namespace internal { // A floating-point presentation format. enum class float_format : unsigned char { general, // General: exponent notation or fixed point based on magnitude. exp, // Exponent notation with the default precision of 6, e.g. 1.2e-3. fixed, // Fixed point with the default precision of 6, e.g. 0.0012. hex }; struct float_specs { int precision; float_format format : 8; sign_t sign : 8; bool upper : 1; bool locale : 1; bool percent : 1; bool binary32 : 1; bool use_grisu : 1; bool trailing_zeros : 1; }; // Writes the exponent exp in the form "[+-]d{2,3}" to buffer. template It write_exponent(int exp, It it) { FMT_ASSERT(-10000 < exp && exp < 10000, "exponent out of range"); if (exp < 0) { *it++ = static_cast('-'); exp = -exp; } else { *it++ = static_cast('+'); } if (exp >= 100) { const char* top = data::digits + (exp / 100) * 2; if (exp >= 1000) *it++ = static_cast(top[0]); *it++ = static_cast(top[1]); exp %= 100; } const char* d = data::digits + exp * 2; *it++ = static_cast(d[0]); *it++ = static_cast(d[1]); return it; } template class float_writer { private: // The number is given as v = digits_ * pow(10, exp_). const char* digits_; int num_digits_; int exp_; size_t size_; float_specs specs_; Char decimal_point_; template It prettify(It it) const { // pow(10, full_exp - 1) <= v <= pow(10, full_exp). int full_exp = num_digits_ + exp_; if (specs_.format == float_format::exp) { // Insert a decimal point after the first digit and add an exponent. *it++ = static_cast(*digits_); int num_zeros = specs_.precision - num_digits_; bool trailing_zeros = num_zeros > 0 && specs_.trailing_zeros; if (num_digits_ > 1 || trailing_zeros) *it++ = decimal_point_; it = copy_str(digits_ + 1, digits_ + num_digits_, it); if (trailing_zeros) it = std::fill_n(it, num_zeros, static_cast('0')); *it++ = static_cast(specs_.upper ? 'E' : 'e'); return write_exponent(full_exp - 1, it); } if (num_digits_ <= full_exp) { // 1234e7 -> 12340000000[.0+] it = copy_str(digits_, digits_ + num_digits_, it); it = std::fill_n(it, full_exp - num_digits_, static_cast('0')); if (specs_.trailing_zeros) { *it++ = decimal_point_; int num_zeros = specs_.precision - full_exp; if (num_zeros <= 0) { if (specs_.format != float_format::fixed) *it++ = static_cast('0'); return it; } #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION if (num_zeros > 1000) throw std::runtime_error("fuzz mode - avoiding excessive cpu use"); #endif it = std::fill_n(it, num_zeros, static_cast('0')); } } else if (full_exp > 0) { // 1234e-2 -> 12.34[0+] it = copy_str(digits_, digits_ + full_exp, it); if (!specs_.trailing_zeros) { // Remove trailing zeros. int num_digits = num_digits_; while (num_digits > full_exp && digits_[num_digits - 1] == '0') --num_digits; if (num_digits != full_exp) *it++ = decimal_point_; return copy_str(digits_ + full_exp, digits_ + num_digits, it); } *it++ = decimal_point_; it = copy_str(digits_ + full_exp, digits_ + num_digits_, it); if (specs_.precision > num_digits_) { // Add trailing zeros. int num_zeros = specs_.precision - num_digits_; it = std::fill_n(it, num_zeros, static_cast('0')); } } else { // 1234e-6 -> 0.001234 *it++ = static_cast('0'); int num_zeros = -full_exp; if (specs_.precision >= 0 && specs_.precision < num_zeros) num_zeros = specs_.precision; int num_digits = num_digits_; if (!specs_.trailing_zeros) while (num_digits > 0 && digits_[num_digits - 1] == '0') --num_digits; if (num_zeros != 0 || num_digits != 0) { *it++ = decimal_point_; it = std::fill_n(it, num_zeros, static_cast('0')); it = copy_str(digits_, digits_ + num_digits, it); } } return it; } public: float_writer(const char* digits, int num_digits, int exp, float_specs specs, Char decimal_point) : digits_(digits), num_digits_(num_digits), exp_(exp), specs_(specs), decimal_point_(decimal_point) { int full_exp = num_digits + exp - 1; int precision = specs.precision > 0 ? specs.precision : 16; if (specs_.format == float_format::general && !(full_exp >= -4 && full_exp < precision)) { specs_.format = float_format::exp; } size_ = prettify(counting_iterator()).count(); size_ += specs.sign ? 1 : 0; } size_t size() const { return size_; } size_t width() const { return size(); } template void operator()(It&& it) { if (specs_.sign) *it++ = static_cast(data::signs[specs_.sign]); it = prettify(it); } }; template int format_float(T value, int precision, float_specs specs, buffer& buf); // Formats a floating-point number with snprintf. template int snprintf_float(T value, int precision, float_specs specs, buffer& buf); template T promote_float(T value) { return value; } inline double promote_float(float value) { return value; } template FMT_CONSTEXPR void handle_int_type_spec(const Spec& specs, Handler&& handler) { if (specs.thousands != '\0') { handler.on_num(); return; } switch (specs.type) { case 0: case 'd': handler.on_dec(); break; case 'x': case 'X': handler.on_hex(); break; case 'b': case 'B': handler.on_bin(); break; case 'o': handler.on_oct(); break; case 'n': case 'l': case 'L': handler.on_num(); break; default: handler.on_error("Invalid type specifier \"" + std::string(1, specs.type) + "\" for formatting a value of type int"); } } template FMT_CONSTEXPR float_specs parse_float_type_spec( const basic_format_specs& specs, ErrorHandler&& eh = {}) { auto result = float_specs(); if (specs.thousands != '\0') { eh.on_error("Thousand separators are not supported for floating point numbers"); return result; } result.trailing_zeros = specs.alt; switch (specs.type) { case 0: result.format = float_format::general; result.trailing_zeros |= specs.precision != 0; break; case 'G': result.upper = true; FMT_FALLTHROUGH; case 'g': result.format = float_format::general; break; case 'E': result.upper = true; FMT_FALLTHROUGH; case 'e': result.format = float_format::exp; result.trailing_zeros |= specs.precision != 0; break; case 'F': result.upper = true; FMT_FALLTHROUGH; case 'f': result.format = float_format::fixed; result.trailing_zeros |= specs.precision != 0; break; #if FMT_DEPRECATED_PERCENT case '%': result.format = float_format::fixed; result.percent = true; break; #endif case 'A': result.upper = true; FMT_FALLTHROUGH; case 'a': result.format = float_format::hex; break; case 'n': case 'l': case 'L': result.locale = true; break; default: eh.on_error("Invalid type specifier \"" + std::string(1, specs.type) + "\" for formatting a value of type float"); break; } return result; } template FMT_CONSTEXPR void handle_char_specs(const basic_format_specs* specs, Handler&& handler) { if (!specs) return handler.on_char(); if (specs->type && specs->type != 'c') return handler.on_int(); if (specs->align == align::numeric || specs->sign != sign::none || specs->alt) handler.on_error("invalid format specifier for char"); handler.on_char(); } template FMT_CONSTEXPR void handle_cstring_type_spec(Char spec, Handler&& handler) { if (spec == 0 || spec == 's') handler.on_string(); else if (spec == 'p') handler.on_pointer(); else handler.on_error("Invalid type specifier \"" + std::string(1, spec) + "\" for formatting a value of type string"); } template FMT_CONSTEXPR void check_string_type_spec(Char spec, ErrorHandler&& eh) { if (spec != 0 && spec != 's') eh.on_error("Invalid type specifier \"" + std::string(1, spec) + "\" for formatting a value of type string"); } template FMT_CONSTEXPR void check_pointer_type_spec(Char spec, ErrorHandler&& eh) { if (spec != 0 && spec != 'p') eh.on_error("Invalid type specifier \"" + std::string(1, spec) + "\" for formatting a value of type pointer"); } template class int_type_checker : private ErrorHandler { public: FMT_CONSTEXPR explicit int_type_checker(ErrorHandler eh) : ErrorHandler(eh) {} FMT_CONSTEXPR void on_dec() {} FMT_CONSTEXPR void on_hex() {} FMT_CONSTEXPR void on_bin() {} FMT_CONSTEXPR void on_oct() {} FMT_CONSTEXPR void on_num() {} FMT_CONSTEXPR void on_error(std::string error) { ErrorHandler::on_error(error); } }; template class char_specs_checker : public ErrorHandler { private: char type_; public: FMT_CONSTEXPR char_specs_checker(char type, ErrorHandler eh) : ErrorHandler(eh), type_(type) {} FMT_CONSTEXPR void on_int() { handle_int_type_spec(type_, int_type_checker(*this)); } FMT_CONSTEXPR void on_char() {} }; template class cstring_type_checker : public ErrorHandler { public: FMT_CONSTEXPR explicit cstring_type_checker(ErrorHandler eh) : ErrorHandler(eh) {} FMT_CONSTEXPR void on_string() {} FMT_CONSTEXPR void on_pointer() {} }; template void arg_map::init(const basic_format_args& args) { if (map_) return; map_ = new entry[internal::to_unsigned(args.max_size())]; if (args.is_packed()) { for (int i = 0;; ++i) { internal::type arg_type = args.type(i); if (arg_type == internal::none_type) return; if (arg_type == internal::named_arg_type) push_back(args.values_[i]); } } for (int i = 0, n = args.max_size(); i < n; ++i) { auto type = args.args_[i].type_; if (type == internal::named_arg_type) push_back(args.args_[i].value_); } } template struct nonfinite_writer { sign_t sign; const char* str; static constexpr size_t str_size = 3; size_t size() const { return str_size + (sign ? 1 : 0); } size_t width() const { return size(); } template void operator()(It&& it) const { if (sign) *it++ = static_cast(data::signs[sign]); it = copy_str(str, str + str_size, it); } }; // This template provides operations for formatting and writing data into a // character range. template class basic_writer { public: using char_type = typename Range::value_type; using iterator = typename Range::iterator; using format_specs = basic_format_specs; private: iterator out_; // Output iterator. locale_ref locale_; // Attempts to reserve space for n extra characters in the output range. // Returns a pointer to the reserved range or a reference to out_. auto reserve(std::size_t n) -> decltype(internal::reserve(out_, n)) { return internal::reserve(out_, n); } template struct padded_int_writer { size_t size_; string_view prefix; char_type fill; std::size_t padding; F f; size_t size() const { return size_; } size_t width() const { return size_; } template void operator()(It&& it) const { if (prefix.size() != 0) it = copy_str(prefix.begin(), prefix.end(), it); it = std::fill_n(it, padding, fill); f(it); } }; // Writes an integer in the format // // where are written by f(it). template void write_int(int num_digits, string_view prefix, format_specs specs, F f) { std::size_t size = prefix.size() + to_unsigned(num_digits); char_type fill = specs.fill[0]; std::size_t padding = 0; if (specs.align == align::numeric) { auto unsiged_width = to_unsigned(specs.width); if (unsiged_width > size) { padding = unsiged_width - size; size = unsiged_width; } } else if (specs.precision > num_digits) { size = prefix.size() + to_unsigned(specs.precision); padding = to_unsigned(specs.precision - num_digits); fill = static_cast('0'); } if (specs.align == align::none) specs.align = align::right; write_padded(specs, padded_int_writer{size, prefix, fill, padding, f}); } // Writes a decimal integer. template void write_decimal(Int value) { auto abs_value = static_cast>(value); bool negative = is_negative(value); // Don't do -abs_value since it trips unsigned-integer-overflow sanitizer. if (negative) abs_value = ~abs_value + 1; int num_digits = count_digits(abs_value); auto&& it = reserve((negative ? 1 : 0) + static_cast(num_digits)); if (negative) *it++ = static_cast('-'); it = format_decimal(it, abs_value, num_digits); } // The handle_int_type_spec handler that writes an integer. template struct int_writer { using unsigned_type = uint32_or_64_or_128_t; basic_writer& writer; const Specs& specs; unsigned_type abs_value; char prefix[4]; unsigned prefix_size; string_view get_prefix() const { return string_view(prefix, prefix_size); } int_writer(basic_writer& w, Int value, const Specs& s) : writer(w), specs(s), abs_value(static_cast(value)), prefix_size(0) { if (is_negative(value)) { prefix[0] = '-'; ++prefix_size; abs_value = 0 - abs_value; } else if (specs.sign != sign::none && specs.sign != sign::minus) { prefix[0] = specs.sign == sign::plus ? '+' : ' '; ++prefix_size; } } struct dec_writer { unsigned_type abs_value; int num_digits; template void operator()(It&& it) const { it = internal::format_decimal(it, abs_value, num_digits); } }; void on_dec() { int num_digits = count_digits(abs_value); writer.write_int(num_digits, get_prefix(), specs, dec_writer{abs_value, num_digits}); } struct hex_writer { int_writer& self; int num_digits; template void operator()(It&& it) const { it = format_uint<4, char_type>(it, self.abs_value, num_digits, self.specs.type != 'x'); } }; void on_hex() { if (specs.alt) { prefix[prefix_size++] = '0'; prefix[prefix_size++] = specs.type; } int num_digits = count_digits<4>(abs_value); writer.write_int(num_digits, get_prefix(), specs, hex_writer{*this, num_digits}); } template struct bin_writer { unsigned_type abs_value; int num_digits; template void operator()(It&& it) const { it = format_uint(it, abs_value, num_digits); } }; void on_bin() { if (specs.alt) { prefix[prefix_size++] = '0'; prefix[prefix_size++] = static_cast(specs.type); } int num_digits = count_digits<1>(abs_value); writer.write_int(num_digits, get_prefix(), specs, bin_writer<1>{abs_value, num_digits}); } void on_oct() { int num_digits = count_digits<3>(abs_value); if (specs.alt && specs.precision <= num_digits && abs_value != 0) { // Octal prefix '0' is counted as a digit, so only add it if precision // is not greater than the number of digits. prefix[prefix_size++] = '0'; } writer.write_int(num_digits, get_prefix(), specs, bin_writer<3>{abs_value, num_digits}); } enum { sep_size = 1 }; struct num_writer { unsigned_type abs_value; int size; const std::string& groups; char_type sep; template void operator()(It&& it) const { basic_string_view s(&sep, sep_size); // Index of a decimal digit with the least significant digit having // index 0. int digit_index = 0; std::string::const_iterator group = groups.cbegin(); it = format_decimal( it, abs_value, size, [this, s, &group, &digit_index](char_type*& buffer) { if (*group <= 0 || ++digit_index % *group != 0 || *group == max_value()) return; if (group + 1 != groups.cend()) { digit_index = 0; ++group; } buffer -= s.size(); std::uninitialized_copy(s.data(), s.data() + s.size(), make_checked(buffer, s.size())); }); } }; void on_num() { std::string groups = grouping(writer.locale_); if (groups.empty()) return on_dec(); auto sep = specs.thousands; if (!sep) return on_dec(); int num_digits = count_digits(abs_value); int size = num_digits; std::string::const_iterator group = groups.cbegin(); while (group != groups.cend() && num_digits > *group && *group > 0 && *group != max_value()) { size += sep_size; num_digits -= *group; ++group; } if (group == groups.cend()) size += sep_size * ((num_digits - 1) / groups.back()); writer.write_int(size, get_prefix(), specs, num_writer{abs_value, size, groups, static_cast(sep)}); } FMT_NORETURN void on_error(std::string error) { FMT_THROW(duckdb::Exception(error)); } }; template struct str_writer { const Char* s; size_t size_; size_t size() const { return size_; } size_t width() const { return count_code_points(basic_string_view(s, size_)); } template void operator()(It&& it) const { it = copy_str(s, s + size_, it); } }; template struct pointer_writer { UIntPtr value; int num_digits; size_t size() const { return to_unsigned(num_digits) + 2; } size_t width() const { return size(); } template void operator()(It&& it) const { *it++ = static_cast('0'); *it++ = static_cast('x'); it = format_uint<4, char_type>(it, value, num_digits); } }; public: explicit basic_writer(Range out, locale_ref loc = locale_ref()) : out_(out.begin()), locale_(loc) {} iterator out() const { return out_; } // Writes a value in the format // // where is written by f(it). template void write_padded(const format_specs& specs, F&& f) { // User-perceived width (in code points). unsigned width = to_unsigned(specs.width); size_t size = f.size(); // The number of code units. size_t num_code_points = width != 0 ? f.width() : size; if (width <= num_code_points) return f(reserve(size)); auto&& it = reserve(width + (size - num_code_points)); char_type fill = specs.fill[0]; std::size_t padding = width - num_code_points; if (specs.align == align::right) { it = std::fill_n(it, padding, fill); f(it); } else if (specs.align == align::center) { std::size_t left_padding = padding / 2; it = std::fill_n(it, left_padding, fill); f(it); it = std::fill_n(it, padding - left_padding, fill); } else { f(it); it = std::fill_n(it, padding, fill); } } void write(int value) { write_decimal(value); } void write(long value) { write_decimal(value); } void write(long long value) { write_decimal(value); } void write(unsigned value) { write_decimal(value); } void write(unsigned long value) { write_decimal(value); } void write(unsigned long long value) { write_decimal(value); } #if FMT_USE_INT128 void write(int128_t value) { write_decimal(value); } void write(uint128_t value) { write_decimal(value); } #endif template void write_int(T value, const Spec& spec) { handle_int_type_spec(spec, int_writer(*this, value, spec)); } template ::value)> void write(T value, format_specs specs = {}) { float_specs fspecs = parse_float_type_spec(specs); fspecs.sign = specs.sign; if (std::signbit(value)) { // value < 0 is false for NaN so use signbit. fspecs.sign = sign::minus; value = -value; } else if (fspecs.sign == sign::minus) { fspecs.sign = sign::none; } if (!std::isfinite(value)) { auto str = std::isinf(value) ? (fspecs.upper ? "INF" : "inf") : (fspecs.upper ? "NAN" : "nan"); return write_padded(specs, nonfinite_writer{fspecs.sign, str}); } if (specs.align == align::none) { specs.align = align::right; } else if (specs.align == align::numeric) { if (fspecs.sign) { auto&& it = reserve(1); *it++ = static_cast(data::signs[fspecs.sign]); fspecs.sign = sign::none; if (specs.width != 0) --specs.width; } specs.align = align::right; } memory_buffer buffer; if (fspecs.format == float_format::hex) { if (fspecs.sign) buffer.push_back(data::signs[fspecs.sign]); snprintf_float(promote_float(value), specs.precision, fspecs, buffer); write_padded(specs, str_writer{buffer.data(), buffer.size()}); return; } int precision = specs.precision >= 0 || !specs.type ? specs.precision : 6; if (fspecs.format == float_format::exp) ++precision; if (const_check(std::is_same())) fspecs.binary32 = true; fspecs.use_grisu = use_grisu(); if (const_check(FMT_DEPRECATED_PERCENT) && fspecs.percent) value *= 100; int exp = format_float(promote_float(value), precision, fspecs, buffer); if (const_check(FMT_DEPRECATED_PERCENT) && fspecs.percent) { buffer.push_back('%'); --exp; // Adjust decimal place position. } fspecs.precision = precision; char_type point = fspecs.locale ? decimal_point(locale_) : static_cast('.'); write_padded(specs, float_writer(buffer.data(), static_cast(buffer.size()), exp, fspecs, point)); } void write(char value) { auto&& it = reserve(1); *it++ = value; } template ::value)> void write(Char value) { auto&& it = reserve(1); *it++ = value; } void write(string_view value) { auto&& it = reserve(value.size()); it = copy_str(value.begin(), value.end(), it); } void write(wstring_view value) { static_assert(std::is_same::value, ""); auto&& it = reserve(value.size()); it = std::copy(value.begin(), value.end(), it); } template void write(const Char* s, std::size_t size, const format_specs& specs) { write_padded(specs, str_writer{s, size}); } template void write(basic_string_view s, const format_specs& specs = {}) { const Char* data = s.data(); std::size_t size = s.size(); if (specs.precision >= 0 && to_unsigned(specs.precision) < size) size = code_point_index(s, to_unsigned(specs.precision)); write(data, size, specs); } template void write_pointer(UIntPtr value, const format_specs* specs) { int num_digits = count_digits<4>(value); auto pw = pointer_writer{value, num_digits}; if (!specs) return pw(reserve(to_unsigned(num_digits) + 2)); format_specs specs_copy = *specs; if (specs_copy.align == align::none) specs_copy.align = align::right; write_padded(specs_copy, pw); } }; using writer = basic_writer>; template struct is_integral : std::is_integral {}; template <> struct is_integral : std::true_type {}; template <> struct is_integral : std::true_type {}; template class arg_formatter_base { public: using char_type = typename Range::value_type; using iterator = typename Range::iterator; using format_specs = basic_format_specs; private: using writer_type = basic_writer; writer_type writer_; format_specs* specs_; struct char_writer { char_type value; size_t size() const { return 1; } size_t width() const { return 1; } template void operator()(It&& it) const { *it++ = value; } }; void write_char(char_type value) { if (specs_) writer_.write_padded(*specs_, char_writer{value}); else writer_.write(value); } void write_pointer(const void* p) { writer_.write_pointer(internal::to_uintptr(p), specs_); } protected: writer_type& writer() { return writer_; } FMT_DEPRECATED format_specs* spec() { return specs_; } format_specs* specs() { return specs_; } iterator out() { return writer_.out(); } void write(bool value) { string_view sv(value ? "true" : "false"); specs_ ? writer_.write(sv, *specs_) : writer_.write(sv); } void write(const char_type* value) { if (!value) { FMT_THROW(duckdb::Exception("string pointer is null")); } else { auto length = std::char_traits::length(value); basic_string_view sv(value, length); specs_ ? writer_.write(sv, *specs_) : writer_.write(sv); } } public: arg_formatter_base(Range r, format_specs* s, locale_ref loc) : writer_(r, loc), specs_(s) {} iterator operator()(monostate) { FMT_ASSERT(false, "invalid argument type"); return out(); } template ::value)> iterator operator()(T value) { if (specs_) writer_.write_int(value, *specs_); else writer_.write(value); return out(); } iterator operator()(char_type value) { internal::handle_char_specs( specs_, char_spec_handler(*this, static_cast(value))); return out(); } iterator operator()(bool value) { if (specs_ && specs_->type) return (*this)(value ? 1 : 0); write(value != 0); return out(); } template ::value)> iterator operator()(T value) { writer_.write(value, specs_ ? *specs_ : format_specs()); return out(); } struct char_spec_handler : ErrorHandler { arg_formatter_base& formatter; char_type value; char_spec_handler(arg_formatter_base& f, char_type val) : formatter(f), value(val) {} void on_int() { if (formatter.specs_) formatter.writer_.write_int(value, *formatter.specs_); else formatter.writer_.write(value); } void on_char() { formatter.write_char(value); } }; struct cstring_spec_handler : internal::error_handler { arg_formatter_base& formatter; const char_type* value; cstring_spec_handler(arg_formatter_base& f, const char_type* val) : formatter(f), value(val) {} void on_string() { formatter.write(value); } void on_pointer() { formatter.write_pointer(value); } }; iterator operator()(const char_type* value) { if (!specs_) return write(value), out(); internal::handle_cstring_type_spec(specs_->type, cstring_spec_handler(*this, value)); return out(); } iterator operator()(basic_string_view value) { if (specs_) { internal::check_string_type_spec(specs_->type, internal::error_handler()); writer_.write(value, *specs_); } else { writer_.write(value); } return out(); } iterator operator()(const void* value) { if (specs_) check_pointer_type_spec(specs_->type, internal::error_handler()); write_pointer(value); return out(); } }; template FMT_CONSTEXPR bool is_name_start(Char c) { return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || '_' == c; } // Parses the range [begin, end) as an unsigned integer. This function assumes // that the range is non-empty and the first character is a digit. template FMT_CONSTEXPR int parse_nonnegative_int(const Char*& begin, const Char* end, ErrorHandler&& eh) { FMT_ASSERT(begin != end && '0' <= *begin && *begin <= '9', ""); if (*begin == '0') { ++begin; return 0; } unsigned value = 0; // Convert to unsigned to prevent a warning. constexpr unsigned max_int = max_value(); unsigned big = max_int / 10; do { // Check for overflow. if (value > big) { value = max_int + 1; break; } value = value * 10 + unsigned(*begin - '0'); ++begin; } while (begin != end && '0' <= *begin && *begin <= '9'); if (value > max_int) eh.on_error("number is too big"); return static_cast(value); } template class custom_formatter { private: using char_type = typename Context::char_type; basic_format_parse_context& parse_ctx_; Context& ctx_; public: explicit custom_formatter(basic_format_parse_context& parse_ctx, Context& ctx) : parse_ctx_(parse_ctx), ctx_(ctx) {} bool operator()(typename basic_format_arg::handle h) const { h.format(parse_ctx_, ctx_); return true; } template bool operator()(T) const { return false; } }; template using is_integer = bool_constant::value && !std::is_same::value && !std::is_same::value && !std::is_same::value>; template class width_checker { public: explicit FMT_CONSTEXPR width_checker(ErrorHandler& eh) : handler_(eh) {} template ::value)> FMT_CONSTEXPR unsigned long long operator()(T value) { if (is_negative(value)) handler_.on_error("negative width"); return static_cast(value); } template ::value)> FMT_CONSTEXPR unsigned long long operator()(T) { handler_.on_error("width is not integer"); return 0; } private: ErrorHandler& handler_; }; template class precision_checker { public: explicit FMT_CONSTEXPR precision_checker(ErrorHandler& eh) : handler_(eh) {} template ::value)> FMT_CONSTEXPR unsigned long long operator()(T value) { if (is_negative(value)) handler_.on_error("negative precision"); return static_cast(value); } template ::value)> FMT_CONSTEXPR unsigned long long operator()(T) { handler_.on_error("precision is not integer"); return 0; } private: ErrorHandler& handler_; }; // A format specifier handler that sets fields in basic_format_specs. template class specs_setter { public: explicit FMT_CONSTEXPR specs_setter(basic_format_specs& specs) : specs_(specs) {} FMT_CONSTEXPR specs_setter(const specs_setter& other) : specs_(other.specs_) {} FMT_CONSTEXPR void on_align(align_t align) { specs_.align = align; } FMT_CONSTEXPR void on_fill(Char fill) { specs_.fill[0] = fill; } FMT_CONSTEXPR void on_plus() { specs_.sign = sign::plus; } FMT_CONSTEXPR void on_minus() { specs_.sign = sign::minus; } FMT_CONSTEXPR void on_space() { specs_.sign = sign::space; } FMT_CONSTEXPR void on_comma() { specs_.thousands = ','; } FMT_CONSTEXPR void on_underscore() { specs_.thousands = '_'; } FMT_CONSTEXPR void on_single_quote() { specs_.thousands = '\''; } FMT_CONSTEXPR void on_thousands(char sep) { specs_.thousands = sep; } FMT_CONSTEXPR void on_hash() { specs_.alt = true; } FMT_CONSTEXPR void on_zero() { specs_.align = align::numeric; specs_.fill[0] = Char('0'); } FMT_CONSTEXPR void on_width(int width) { specs_.width = width; } FMT_CONSTEXPR void on_precision(int precision) { specs_.precision = precision; } FMT_CONSTEXPR void end_precision() {} FMT_CONSTEXPR void on_type(Char type) { specs_.type = static_cast(type); } protected: basic_format_specs& specs_; }; template class numeric_specs_checker { public: FMT_CONSTEXPR numeric_specs_checker(ErrorHandler& eh, internal::type arg_type) : error_handler_(eh), arg_type_(arg_type) {} FMT_CONSTEXPR void require_numeric_argument() { if (!is_arithmetic_type(arg_type_)) error_handler_.on_error("format specifier requires numeric argument"); } FMT_CONSTEXPR void check_sign() { require_numeric_argument(); if (is_integral_type(arg_type_) && arg_type_ != int_type && arg_type_ != long_long_type && arg_type_ != internal::char_type) { error_handler_.on_error("format specifier requires signed argument"); } } FMT_CONSTEXPR void check_precision() { if (is_integral_type(arg_type_) || arg_type_ == internal::pointer_type) error_handler_.on_error("precision not allowed for this argument type"); } private: ErrorHandler& error_handler_; internal::type arg_type_; }; // A format specifier handler that checks if specifiers are consistent with the // argument type. template class specs_checker : public Handler { public: FMT_CONSTEXPR specs_checker(const Handler& handler, internal::type arg_type) : Handler(handler), checker_(*this, arg_type) {} FMT_CONSTEXPR specs_checker(const specs_checker& other) : Handler(other), checker_(*this, other.arg_type_) {} FMT_CONSTEXPR void on_align(align_t align) { if (align == align::numeric) checker_.require_numeric_argument(); Handler::on_align(align); } FMT_CONSTEXPR void on_plus() { checker_.check_sign(); Handler::on_plus(); } FMT_CONSTEXPR void on_minus() { checker_.check_sign(); Handler::on_minus(); } FMT_CONSTEXPR void on_space() { checker_.check_sign(); Handler::on_space(); } FMT_CONSTEXPR void on_hash() { checker_.require_numeric_argument(); Handler::on_hash(); } FMT_CONSTEXPR void on_zero() { checker_.require_numeric_argument(); Handler::on_zero(); } FMT_CONSTEXPR void end_precision() { checker_.check_precision(); } private: numeric_specs_checker checker_; }; template