#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: antanas $
#$Date: 2021-11-08 05:46:23 +0200 (Mon, 08 Nov 2021) $
#$Revision: 8927 $
#$URL: svn://www.crystallography.net/cod-tools/tags/v3.4.0/scripts/cif_ddlm_dic_check $
#------------------------------------------------------------------------------
#*
#* Check DDLm dictionaries against a set of best practice rules.
#*
#* USAGE:
#*    $0 --options cif_core.dic
#*
#* ENVIRONMENT:
#*   COD_TOOLS_DDLM_IMPORT_PATH
#*                     A list of directories in which to look for the
#*                     DDLm-compliant CIF dictionaries that are imported
#*                     by other DDLm-compliant CIF dictionaries. List
#*                     elements are separated by the colon symbol (':').
#*                     Directories listed in COD_TOOLS_DDLM_IMPORT_PATH
#*                     have a lower priority than those provided using
#*                     the command line option (--add-dictionary-import-path),
#*                     but higher than the default import path directory
#*                     (directory of the importing dictionary).
#**

use strict;
use warnings;
binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

use File::Basename qw( fileparse );
use List::MoreUtils qw( any uniq );

use COD::CIF::Parser qw( parse_cif );
use COD::CIF::DDL::DDLm qw( build_ddlm_dic
                            get_all_data_names
                            get_type_container
                            get_type_contents
                            get_type_purpose
                            get_category_id
                            get_definition_class
                            get_definition_scope
                            get_dictionary_class
                            get_data_name
                            get_data_alias );
use COD::CIF::DDL::DDLm::Import qw( get_ddlm_import_path_from_env
                                    resolve_dic_imports );
use COD::CIF::DDL::Ranges qw( parse_range
                              range_to_string
                              is_in_range );
use COD::CIF::Tags::Manage qw( get_item_loop_index
                               has_special_value );
use COD::SOptions qw( getOptions get_value );
use COD::SUsage qw( usage options );
use COD::ErrorHandler qw( process_parser_messages
                          process_warnings );
use COD::ToolsVersion qw( get_version_string );

##
# Checks if there is one and only one head category.
#
# @param $dic_data_block
#       Dictionary data block as returned by the COD::CIF::Parser.
# @return
#       Array reference to a list of validation messages.
##
sub check_head_category
{
    my ( $dic_data_block ) = @_;

    my @note;

    my @head_categories;
    for my $save_frame ( @{$dic_data_block->{'save_blocks'}} ) {
        if ( uc get_definition_class( $save_frame ) eq 'HEAD' &&
             uc get_definition_scope( $save_frame ) eq 'CATEGORY' ) {
            push @head_categories, $save_frame;
        }
    };

    if ( !@head_categories ) {
        push @note, 'the mandatory HEAD save frame is missing';
    };

    if ( @head_categories > 1 ) {
        push @note,
             'more than one HEAD save frame located -- save frames [' .
             ( join ', ', map { "'$_->{'name'}'" } @head_categories ) .
             '] are marked as having the \'HEAD\' definition class';
    };

    return \@note;
}

##
# Checks the consistency of the DICTIONARY_AUDIT loop and the related
# dictionary metadata items. The subroutine checks that:
#   - Version values are of the correct format.
#   - DICTIONARY_AUDIT loop is sorted in ascending order by
#     the version number.
#   - Changes to the current dictionary version are described
#     in the DICTIONARY_AUDIT loop.
#   - Dictionary revision date matches the date in
#     the DICTIONARY_AUDIT loop packet.
#   - Dictionary version number matches the latest version
#     number in the DICTIONARY_AUDIT loop.
#
# @param $dic_data_block
#       Dictionary data block as returned by the COD::CIF::Parser.
# @return
#       Array reference to a list of validation messages.
##
sub check_dictionary_audit_loop
{
    my ( $dic_data_block ) = @_;

    my $dic_values = $dic_data_block->{'values'};

    return [] if !defined $dic_values->{'_dictionary_audit.version'};

    my @notes;
    my %audit_packet = %{extract_audit_packets($dic_data_block)};
    for my $version (sort keys %audit_packet) {
        for my $issue (@{$audit_packet{$version}->{'parsed_version'}{'issues'}}) {
            $issue = 'attribute \'_dictionary_audit.version\' ' . $issue;
            push @notes, $issue;
        }
    }

    # Check audit entry order
    for my $i (0..($#{$dic_values->{'_dictionary_audit.version'}} - 1)) {
        my $curr_ver_string = $dic_values->{'_dictionary_audit.version'}[$i];
        my $curr_ver = get_parsed_version($audit_packet{$curr_ver_string});
        next if !defined $curr_ver;
        my $next_ver_string = $dic_values->{'_dictionary_audit.version'}[$i+1];
        my $next_ver = get_parsed_version($audit_packet{$next_ver_string});
        next if !defined $next_ver;
        if ( compare_versions( $curr_ver, $next_ver ) > 0 ) {
            push @notes,
                 'attribute \'_dictionary_audit.version\' value ' .
                 "'$curr_ver_string' appears before value '$next_ver_string' " .
                 '-- packets of the DICTIONARY_AUDIT loop should be sorted ' .
                 'in ascending order by the version number';
            last;
        }
    }

    return \@notes if !defined $dic_values->{'_dictionary.version'};

    my $dic_version_string = $dic_values->{'_dictionary.version'}[0];

    # Check if the version number is registered at all
    if (!exists $audit_packet{$dic_version_string}) {
        push @notes,
             'changes to the current version of the dictionary are not ' .
             'described in the DICTIONARY_AUDIT loop -- attribute ' .
             "'_dictionary.version' value '$dic_version_string' was not " .
             'found among the values of the \'_dictionary_audit.version\' ' .
             'attribute';
    } else {
        if (defined $audit_packet{$dic_version_string}->{'date'} &&
            defined $dic_values->{'_dictionary.date'} &&
            ($audit_packet{$dic_version_string}->{'date'} ne
             $dic_values->{'_dictionary.date'}[0] ) ) {
                push @notes,
                     'dictionary revision date specified using ' .
                     'the \'_dictionary.date\' attribute does not match ' .
                     'the date in the DICTIONARY_AUDIT loop packet for ' .
                     "version '$dic_version_string' " .
                     "('$dic_values->{'_dictionary.date'}[0]' vs. " .
                     "'$audit_packet{$dic_version_string}->{'date'}')";
        }
    }

    my $dic_version = parse_and_check_version_string($dic_version_string);
    for my $issue (@{$dic_version->{'issues'}}) {
        $issue = 'attribute \'_dictionary.version\' ' . $issue;
        push @notes, $issue;
    }
    # Check that the declared dictionary version is the latest one
    if (defined $dic_version->{'components'}) {
        my $latest_version = $dic_version->{'components'};
        my $latest_version_string = $dic_version_string;
        for my $version_string (sort keys %audit_packet) {
            my $audit_version = get_parsed_version($audit_packet{$version_string});
            next if !defined $audit_version;
            if (compare_versions($latest_version, $audit_version) < 0) {
                $latest_version = $audit_version;
                $latest_version_string = $version_string
            }
        }
        if ($dic_version_string ne $latest_version_string) {
            push @notes,
                 'dictionary version number does not match the latest ' .
                 'version number in the DICTIONARY_AUDIT loop ' .
                 "('$dic_version_string' vs. '$latest_version_string')";
        }
    }

    return \@notes;
}

##
# Extracts and prepares the DICTIONARY_AUDIT loop packet information
# for further processing.
#
# @param $dic_data_block
#       Dictionary data block as returned by the COD::CIF::Parser.
# @return
#       Reference to a data structure of the following form:
#       {
#         # Data item _dictionary_audit.version values
#         # (version number strings) serve as the keys.
#           '0.1.0' => {
#             # Parsed version number as returned by
#             # the parse_and_check_version_string()
#             # subroutine.
#               'parsed_version' => {
#                   # May be undefined
#                   'components' => {
#                       'major' => '0',
#                       'minor' => '1',
#                       'patch' => '0',
#                       'prerelease' => undef,
#                       'build' => undef,
#                   },
#                   'issues' => [
#                       # ...
#                   ]
#               },
#             # Value of the _dictionary_audit.version data
#             # item (revision date). May be undefined.
#               'date' => 2021-07-25,
#           },
#           # ...
#           '0.2' => {
#             # Parsed version number as returned by
#             # the parse_version_string() subroutine.
#             # Undefined if the string could not be
#             # parsed successfully.
#               'parsed_version' => undef,
#               'date' => 2021-07-26,
#           },
#       }
##
sub extract_audit_packets
{
    my ($dic_data_block) = @_;

    my %audit_packets;
    my $dic_values = $dic_data_block->{'values'};
    for my $i (0..$#{$dic_values->{'_dictionary_audit.version'}}) {
        my $version = $dic_values->{'_dictionary_audit.version'}[$i];
        $audit_packets{$version}{'parsed_version'} =
                                parse_and_check_version_string($version);
        next if !defined $dic_values->{'_dictionary_audit.date'};
        $audit_packets{$version}{'date'} =
                                $dic_values->{'_dictionary_audit.date'}[$i];
    }

    return \%audit_packets;
}

##
# Parses the DDLm dictionary version string as a SemVer 2.0 [1] string.
#
# @source [1]
#       https://semver.org/spec/v2.0.0.html
#
# @param $version_string
#       Version string that should be parsed.
# @return
#       Reference to a data structure of the following form:
#       {
#         # Major version number
#           'major' => 4,
#         # Minor version number
#           'minor' => 3,
#         # Patch version number
#           'patch' => 2
#         # Pre-release identifiers captured as single string.
#         # May be undefined.
#           'prerelease' => 'dev-0.pre-7'
#         # Build metadata identifiers captured as single string.
#         # May be undefined.
#           'build' => 'build-2000-01-01'
#       }
#
#       or undef value if the version string could not be parsed.
##
sub parse_version_string
{
    my ($version_string) = @_;

    my $version_components;
    if ($version_string =~ m/^([0-9]+)[.]
                              ([0-9]+)[.]
                              ([0-9]+)
                              (-([0-9A-Za-z-.]+))?
                              ([+]([0-9A-Za-z-.]+))?$/x) {
        $version_components = { 'major' => $1,
                                'minor' => $2,
                                'patch' => $3,
                                'prerealease' => $5,
                                'build' => $7 };
    }

    return $version_components;
}

##
# Parses the DDLm dictionary version string as a SemVer 2.0 [1] string
# and runs some additional validity checks on the parsed value.
#
# @source [1]
#       https://semver.org/spec/v2.0.0.html
#
# @param $version_string
#       Version string that should be parsed.
# @return
#       Reference to a data structure of the following form:
#       {
#         # Reference to a data structure of a parsed version number
#         # as returned by the parse_version_string() subroutine.
#         # May be undefined.
#           'components' => {
#           # Major version number
#             'major' => 4,
#           # Minor version number
#             'minor' => 3,
#           # Patch version number
#             'patch' => 2
#           # Pre-release identifiers captured as single string.
#           # May be undefined.
#             'prerelease' => 'dev-0.pre-7'
#           # Build metadata identifiers captured as single string.
#           # May be undefined.
#             'build' => 'build-2000-01-01'
#           },
#         # Reference to an array of parsing issue messages
#           'issues' => [
#               'value '1.1.1.1' could not be parsed as a valid ...'
#               # ...
#            ]
#       }
##
sub parse_and_check_version_string
{
    my ($version_string) = @_;

    my $version_components = parse_version_string($version_string);
    my @issues;
    if (defined $version_components) {
        push @issues, @{check_parsed_version($version_components)};
    } else {
        push @issues,
             "value '$version_string' could not be parsed as a valid " .
             'semantic version string -- the version string should consist ' .
             'of three numbers separated by dots with optional pre-release ' .
             'identifiers, i.e. \'1.234.56\', \'4.7.8-dev-1\''
    }
    my $parsed_version = {
        'components' => $version_components,
        'issues'     => \@issues,
    };

    return $parsed_version;
}

##
# Construct a version string from individual version components.
#
# @param $version
#       Data structure that contains the version components as
#       returned by the parse_version_string() subroutine.
# @return $version_string
#       Constructed version string.
##
sub build_version_string
{
    my ($version) = @_;

    my $version_string = join '.', map { $version->{$_} } qw(major minor patch);
    if (defined $version->{'prerelease'}) {
        $version_string .= '-' . $version->{'prerelease'};
    }
    if (defined $version->{'build'}) {
        $version_string .= '+' . $version->{'build'};
    }

    return $version_string;
}

##
# Checks the validity of the parsed version string from an entry of
# the AUDIT_LOOP
#
# @param $parsed_version
#       Reference to a data structure of a parsed version number
#       as returned by the parse_version_string() subroutine.
# @return
#       Array reference to a list of validation messages.
##
sub check_parsed_version
{
    my ($parsed_version) = @_;

    my @notes;
    my %stripped_version;
    for my $type ( keys %{$parsed_version} ) {
        next if !defined $parsed_version->{$type};
        $stripped_version{$type} = $parsed_version->{$type};
        $stripped_version{$type} =~ s/^0+([0-9])/$1/;
    }
    my $old_version_string = build_version_string($parsed_version);
    my $new_version_string = build_version_string(\%stripped_version);

    if ($old_version_string ne $new_version_string) {
        push @notes,
             "value '$old_version_string' should instead be written as " .
             "'$new_version_string' -- version numbers must not contain " .
             'leading zeros';
    }

    return \@notes;
}

##
# Retrieves the data structure of the parsed version string from
# the extracted DICTIONARY_AUDIT loop packets.
#
# @param $version
#       Reference to an extracted DICTIONARY_AUDIT loop packet as
#       returned by the extract_audit_packets() subroutine.
# @return
#       Reference to the data structure of the parsed version string
#       or undef if the string was is not defined in the input data
#       structure.
##
sub get_parsed_version
{
    my ($version) = @_;

    return if !defined $version->{'parsed_version'}{'components'};

    return $version->{'parsed_version'}{'components'};
}

##
# Compares two parsed DDLm version numbers as if they were SemVer 2.0 [1]
# strings.
#
# @source [1]
#       https://semver.org/spec/v2.0.0.html
#
# @param $version_a
#       Data structure of the first parsed version number as
#       returned by the parse_version_string() subroutine.
# @param $version_b
#       Data structure of the first parsed version number as
#       returned by the parse_version_string() subroutine.
# @return
#        1 if $version_a > $version_b
#        0 if $version_a = $version_b
#       -1 if $version_a < $version_b
##
sub compare_versions
{
    my ($version_a, $version_b) = @_;

    ## no critic (ProhibitMagicNumbers)
    return -1 if ($version_a->{'major'} < $version_b->{'major'});
    return  1 if ($version_a->{'major'} > $version_b->{'major'});
    return -1 if ($version_a->{'minor'} < $version_b->{'minor'});
    return  1 if ($version_a->{'minor'} > $version_b->{'minor'});
    return -1 if ($version_a->{'patch'} < $version_b->{'patch'});
    return  1 if ($version_a->{'patch'} > $version_b->{'patch'});

    if ( defined $version_a->{'prerelease'} &&
         defined $version_b->{'prerelease'} ) {
        return $version_a->{'prerelease'} cmp $version_b->{'prerelease'};
    }
    if ( defined $version_a->{'prerelease'} &&
        !defined $version_b->{'prerelease'} ) {
        return -1;
    }
    if ( defined $version_b->{'prerelease'} &&
        !defined $version_a->{'prerelease'} ) {
        return 1;
    }
    ## use critic

    return 0;
}

##
# Checks if all of the provided save frames have a unique save frame code.
#
# @source [1]
#       2.2.7.1.4. General features,
#       "International Tables for Crystallography Volume G:
#        Definition and exchange of crystallographic data",
#       2005, 25-26, paragraph (6), doi: 10.1107/97809553602060000107
#
# @param $save_frames
#       Reference to an array of save frames as returned by
#       the COD::CIF::Parser.
# @return
#       Array reference to a list of validation messages.
##
sub check_save_frame_code_uniqueness
{
    my ( $save_frames ) = @_;

    my %code_frequency;
    for my $save_frame ( @{$save_frames} ) {
        my $frame_code = $save_frame->{'name'};
        push @{$code_frequency{lc $frame_code}}, $frame_code;
    }

    my @notes;
    for my $frame_code ( sort keys %code_frequency ) {
        my $count = @{$code_frequency{$frame_code}};
        next if $count < 2;
        push @notes,
             "save frame code is not unique -- save frame 'save_$frame_code' " .
             "appears $count times as [" .
             ( join ', ', map {"'$_'"} @{$code_frequency{$frame_code}} ) .
             ']';
    }

    return \@notes;
}

##
# Checks if all data names given in the provided data blocks are unique.
#
# @param $save_frames
#       Reference to an array of save frames as returned by
#       the COD::CIF::Parser.
# @return
#       Array reference to a list of validation messages.
##
sub check_data_name_uniqueness
{
    my ( $save_frames ) = @_;

    my @notes;
    my %data_name_to_frame_codes;
    for my $save_frame ( @{$save_frames} ) {
        my @data_names = map {lc} @{get_all_data_names($save_frame)};
        next if !@data_names;

        for my $data_name ( sort { $a cmp $b } uniq @data_names ) {
            push @{$data_name_to_frame_codes{$data_name}}, $save_frame->{'name'};
        }
    }

    for my $data_name ( sort keys %data_name_to_frame_codes ) {
        my $frame_codes = $data_name_to_frame_codes{$data_name};
        next if @{$frame_codes} < 2;
        push @notes,
             "data name is not unique -- data name '$data_name' is defined " .
             'by save frames [' .
             ( join ', ', map { "'save_$_'"} @{$frame_codes} )
             . ']';
    }

    return \@notes;
}

##
# Checks if the provided category ids can be located in the dictionary.
#
# @param $save_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @param $dic_data_block
#       Dictionary data block as returned by the COD::CIF::Parser.
# @return
#       Array reference to a list of validation messages.
##
sub check_category_ids
{
    my ($save_frame, $dic_data_block) = @_;

    return [] if !defined get_category_id($save_frame);
    my $category_name = uc get_category_id($save_frame);

    my @notes;
    if ( uc get_definition_class( $save_frame ) eq 'HEAD' &&
         uc get_definition_scope( $save_frame ) eq 'CATEGORY' ) {
        if ( $category_name ne uc $dic_data_block->{'values'}{'_dictionary.title'}[0] ) {
            push @notes,
                 'value of the \'_name.category_id\' attribute ' .
                 'in the \'HEAD\' save frame must match the value ' .
                 'of the \'_dictionary.title\' attribute';
        }
    } else {
        my $category_found = 0;
        foreach ( @{$dic_data_block->{'save_blocks'}} ) {
            next if uc get_definition_scope($_) ne 'CATEGORY';
            next if !defined get_data_name($_);
            if ( uc get_data_name($_) eq $category_name ) {
                $category_found = 1;
                last;
            }
        }

        if (!$category_found) {
            push @notes,
                 "the '$category_name' category could not be located";
        }
    }

    return \@notes;
}

##
# Checks the redundancy of the data item aliases.
#
# @param $save_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @return
#       Array reference to a list of validation messages.
##
sub check_aliases
{
    my ( $save_frame ) = @_;

    return [] if !defined get_data_name( $save_frame );
    my $definition_id = uc get_data_name( $save_frame );

    my @validation_messages;
    for my $alias ( @{get_data_alias($save_frame)} ) {
        if ( $definition_id eq uc $alias ) {
            push @validation_messages,
                 'the \'_alias.definition_id\' attribute value ' .
                 "'$alias' matches the '_definition.id' attribute value -- " .
                 'the alias should be removed';
        }
    }

    return \@validation_messages;
}

##
# Checks the usage of the '_enumeration_default.value' attribute.
#
# The subroutine checks that:
#   - Attribute '_enumeration.def_index_id' is also provided in the definition.
#   - In case the '_enumeration_set.state' attribute is also provided,
#     values of the '_enumeration_default.value' attribute are compatible
#     with the values of the '_enumeration_set.state' attribute.
#
# @param $save_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @return
#       Array reference to a list of validation messages.
##
sub check_enumeration_default
{
    my ($save_frame) = @_;

    my @notes;
    my $ENUM_DEFAULT_TAG = '_enumeration_default.value';
    return \@notes if !defined $save_frame->{'values'}{$ENUM_DEFAULT_TAG};

    if ( !defined $save_frame->{'values'}{'_enumeration.def_index_id'} ) {
        push @notes,
             'incomplete data item definition -- attribute ' .
             '\'_enumeration.def_index_id\' must appear in definitions ' .
             "that contain the '$ENUM_DEFAULT_TAG' attribute";
    }

    my $ENUM_VALUE_TAG = '_enumeration_set.state';
    return \@notes if !defined $save_frame->{'values'}{$ENUM_VALUE_TAG};
    my %enum_states;
    for my $i (0..$#{$save_frame->{'values'}{$ENUM_VALUE_TAG}}) {
        next if has_special_value( $save_frame, $ENUM_VALUE_TAG, $i );
        $enum_states{ $save_frame->{'values'}{$ENUM_VALUE_TAG}[$i] } = 1;
    }

    my @unenumerated_defaults;
    for my $i (0..$#{$save_frame->{'values'}{$ENUM_DEFAULT_TAG}}) {
        next if has_special_value( $save_frame, $ENUM_DEFAULT_TAG, $i );
        my $default_value = $save_frame->{'values'}{$ENUM_DEFAULT_TAG}[$i];
        if ( !defined $enum_states{$default_value} ) {
            push @unenumerated_defaults, $default_value
        }
    }
    @unenumerated_defaults = uniq(@unenumerated_defaults);

    for my $unenumerated_default ( uniq(@unenumerated_defaults) ) {
        push @notes,
             "attribute '$ENUM_DEFAULT_TAG' value '$unenumerated_default' " .
             "is not one of the '$ENUM_VALUE_TAG' attribute values -- " .
             'default enumeration values must belong to the enumeration ' .
             'value set'
    }

    return \@notes;
}

##
# Checks the usage of the '_enumeration.def_index_id' attribute.
#
# The subroutine checks that:
#   - Data item referenced using the attribute is defined in the dictionary.
#   - Definition does not reference itself using the attribute.
#   - Attribute '_enumeration_default.value' is also provided in the definition.
#
# @param $save_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @param $dic_struct
#       Dictionary search structure as returned by the
#       COD::CIF::DDL::DDLm::build_ddlm_dic() subroutine.
# @return
#       Array reference to a list of validation messages.
##
sub check_attribute_enumeration_def_index_id
{
    my ($save_frame, $dic_struct) = @_;

    my @notes;
    my $REF_ATTRIBUTE = '_enumeration.def_index_id';
    return \@notes if !defined $save_frame->{'values'}{$REF_ATTRIBUTE};

    my $ref_item_name = $save_frame->{'values'}{$REF_ATTRIBUTE}[0];
    if (!defined $dic_struct->{'Item'}{lc $ref_item_name}) {
        push @notes,
             "attribute '$REF_ATTRIBUTE' references the '$ref_item_name' " .
             'data item that is not defined in the given dictionary';
    } else {
        for my $data_name (@{get_all_data_names($save_frame)}) {
            if (lc $ref_item_name eq lc $data_name) {
                push @notes,
                     'definition references itself using the ' .
                     "'$REF_ATTRIBUTE' attribute";
                last;
            }
        }
    }

    if ( !defined $save_frame->{'values'}{'_enumeration_default.value'} ) {
        push @notes,
             'incomplete data item definition -- attribute ' .
             '\'_enumeration_default.value\' must appear in definitions ' .
             "that contain the '$REF_ATTRIBUTE' attribute";
    }

    return \@notes;
}

##
# Checks the usage of the '_definition.class' attribute.
#
# The subroutine checks that:
#   - Attribute value 'Datum' only appears in data item definitions.
#   - Attribute value 'Functions' is accompanied by the '_method.expression'
#     attribute.
#   - Attribute value 'Head', 'Loop' and 'Set' only appear in category
#     definitions.
#   - Attribute value 'Loop' is accompanied by the '_category_key.name'
#     attribute.
#
# @param $save_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @return
#       Array reference to a list of validation messages.
##
sub check_definition_class
{
    my ($save_frame) = @_;

    my @notes;
    my $ATTRIBUTE = '_definition.class';
    return \@notes if !defined $save_frame->{'values'}{$ATTRIBUTE};

    my $definition_class = $save_frame->{'values'}{$ATTRIBUTE}[0];
    my $definition_class_lc = lc $definition_class;
    my $scope = lc get_definition_scope($save_frame);

    if( $definition_class_lc eq 'datum' && $scope ne 'item' ) {
        push @notes,
             "incorrect category definition -- attribute '$ATTRIBUTE' " .
             "value '$definition_class' is only compatible with " .
             'data item definitions';
    }

    if( $definition_class_lc eq 'functions' && $scope eq 'item' &&
        !defined $save_frame->{'values'}{'_method.expression'} ) {
        push @notes,
             "incomplete data item definition -- attribute '$ATTRIBUTE' " .
             "value is set to '$definition_class', but the " .
             '\'_method.expression\' attribute is not provided'
    }

    if( ( $definition_class_lc eq 'head' ||
          $definition_class_lc eq 'loop' ||
          $definition_class_lc eq 'set' ) && $scope ne 'category' ) {
        push @notes,
             "incorrect data item definition -- attribute '$ATTRIBUTE' " .
             "value '$definition_class' is only compatible with " .
             'category definitions';
    }

    if( $definition_class_lc eq 'loop' &&
        $scope eq 'category' &&
        !defined $save_frame->{'values'}{'_category_key.name'} ) {
        push @notes,
             "incomplete category definition -- attribute '$ATTRIBUTE' " .
             "value is set to '$definition_class', but the " .
             '\'_category_key.name\' attribute is not provided'
    }

    return \@notes;
}

##
# Checks if the enumeration ranges specified explicitly do not
# contradict enumeration ranges imposed by content type.
#
# @param $save_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @param $dic_struct
#       Dictionary search structure as returned by the
#       COD::CIF::DDL::DDLm::build_ddlm_dic() subroutine.
# @param $options
#       Reference to an option hash. The following options are
#       recognised:
#       {
#           # specifies if warnings should be issued in cases
#           # when the explicit range limits match those imposed
#           # by the content type
#           'report_redundant_range_limits' => 0
#       }
# @return
#       Array reference to a list of validation messages.
##
sub check_enumeration_range
{
    my ($save_frame, $dic_struct, $options) = @_;

    return [] if !defined get_data_name( $save_frame );
    return [] if !defined $save_frame->{'values'}{'_enumeration.range'};

    my @validation_messages;

    my $type = lc get_type_contents(
        lc get_data_name( $save_frame ),
        $save_frame,
        $dic_struct
    );

    my $range = $save_frame->{'values'}{'_enumeration.range'}[0];
    my $item_range = parse_range($range);
    my $type_range = get_enum_range_from_type($type);
    if ( !is_subrange( $type_range, $item_range,
                       { 'type' => 'numb' } ) ) {
        push @validation_messages,
             'the declared enumeration range ' .
             range_to_string( $item_range, { 'type' => 'numb' } ) .
             " violates the range imposed by the '$type' data type " .
             range_to_string( $type_range, { 'type' => 'numb' } );
    }

    if ( $options->{'report_redundant_range_limits'} ) {
        if ( defined $item_range->[0] && defined $type_range->[0] &&
            equals($item_range->[0], $type_range->[0], 5) ) {
           push @validation_messages,
                "the lower enumeration range limit '$item_range->[0]' " .
                'is needlessly specified since the same lower limit ' .
                "is imposed by the '$type' data type";
        }
    }

    return \@validation_messages;
}

##
# Checks the usage of the '_name.linked_item_id' attribute.
#
# The subroutine checks that:
#   - Data item referenced using the attribute is defined in the dictionary.
#   - Definition does not reference itself using the attribute.
#   - Attribute '_type.purpose' value is set to 'SU' or 'Link'.
#   - Data item referenced by an SU data item has the 'Measurand' purpose.
#
# @param $save_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @param $dic_struct
#       Dictionary search structure as returned by the
#       COD::CIF::DDL::DDLm::build_ddlm_dic() subroutine.
# @return
#       Array reference to a list of validation messages.
##
sub check_linked_items
{
    my ($save_frame, $dic_struct) = @_;

    my @notes;
    my $ref_attribute = '_name.linked_item_id';
    return \@notes if !exists $save_frame->{'values'}{$ref_attribute};

    my $ref_item_name = $save_frame->{'values'}{$ref_attribute}[0];
    if (!defined $dic_struct->{'Item'}{lc $ref_item_name}) {
        push @notes,
             "attribute '$ref_attribute' references the '$ref_item_name' " .
             'data item that is not defined in the given dictionary';
    } else {
        for my $data_name (@{get_all_data_names($save_frame)}) {
            if (lc $ref_item_name eq lc $data_name) {
                push @notes,
                     'definition references itself using the ' .
                     "'$ref_attribute' attribute";
                last;
            }
        }
    }

    my $type_purpose = lc get_type_purpose( $save_frame );
    if ( $type_purpose eq 'su' ) {
        my $linked_item_name = lc $save_frame->{'values'}{$ref_attribute}[0];
        if ( defined $dic_struct->{'Item'}{$linked_item_name} ) {
            my $linked_item = $dic_struct->{'Item'}{$linked_item_name};
            if ( lc get_type_purpose($linked_item) ne 'measurand' ) {
                push @notes,
                     'data item is defined as having the \'SU\' purpose, ' .
                     "however, it is linked to the '$linked_item_name' " .
                     'data item that has the ' .
                     '\'' . get_type_purpose($linked_item) . '\' ' .
                     'purpose and does not allow standard uncertainties';
            }
        }
    }

    if ( $type_purpose ne 'su' && $type_purpose ne 'link' ) {
        push @notes,
             'incorrect type purpose -- data item is defined as having ' .
             "the '$type_purpose' type purpose while only " .
             '\'SU\' and \'Link\' type purposes are allowed for data ' .
             'items that contain the \'_name.linked_item_id\' attribute in ' .
             'their definition';
    }

    return \@notes;
}

##
# Checks the usage of the '_category_key.name' attribute.
#
# The subroutine checks that:
#   - Attribute '_definition.class' value is set to 'Loop'.
#   - Data items referenced using the attribute are defined in the dictionary.
#   - Referenced data items belong to the same category as the defining item.
#
# @param $save_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @param $dic_struct
#       Dictionary search structure as returned by the
#       COD::CIF::DDL::DDLm::build_ddlm_dic() subroutine.
# @return
#       Array reference to a list of validation messages.
##
sub check_category_key_name
{
    my ($save_frame, $dic_struct) = @_;

    my @notes;
    my $REF_ATTRIBUTE = '_category_key.name';
    return \@notes if !defined $save_frame->{'values'}{$REF_ATTRIBUTE};

    my $definition_class = get_definition_class($save_frame);
    if( uc $definition_class ne 'LOOP' ) {
        push @notes,
             "attribute '$REF_ATTRIBUTE' is not compatible with the " .
             "'$definition_class' definition class -- the only compatible " .
             '\'_definition.class\' attribute value is \'Loop\'';
    }

    for my $ref_item_name (@{$save_frame->{'values'}{$REF_ATTRIBUTE}}) {
        if (!defined $dic_struct->{'Item'}{lc $ref_item_name}) {
            push @notes,
                 "attribute '$REF_ATTRIBUTE' references the '$ref_item_name' " .
                 'data item that is not defined in the given dictionary';
        } else {
            my $ref_item = $dic_struct->{'Item'}{lc $ref_item_name};
            my $definition_name = get_data_name($save_frame);
            my $category_name = get_category_id($ref_item);
            if (lc $definition_name ne lc $category_name) {
                push @notes,
                  "data item '$REF_ATTRIBUTE' is referenced as the category " .
                  'key, however, it does not belong to the given category ' .
                  '(\'' . (uc $category_name) . ' instead of \'' .
                  (uc $definition_name) . '\')';
            }
        }
    }

    return \@notes;
}

##
# Checks the usage of the '_type.contents_referenced_id' attribute.
#
# The subroutine checks that:
#   - Data item referenced using the attribute is defined in the dictionary.
#   - Definition does not reference itself using the attribute.
#   - Attribute '_type.contents' value is set to 'ByReference' [1,2].
#
# @source [1]
#       ddl.dic DDLm reference dictionary version 4.1.0,
#       definition of the '_type.contents_referenced_id' attribute.
# @source [2]
#       https://github.com/COMCIFS/cif_core/blob/5eed5425867dd7bf9cc0f3a4ccf52f01390e7190/ddl.dic#L1720
#
# @param $save_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @param $dic_struct
#       Dictionary search structure as returned by the
#       COD::CIF::DDL::DDLm::build_ddlm_dic() subroutine.
# @return
#       Array reference to a list of validation messages.
##
sub check_attribute_type_contents_referenced_id
{
    my ($save_frame, $dic_struct) = @_;

    my @notes;
    my $REF_ATTRIBUTE = '_type.contents_referenced_id';
    return \@notes if !defined $save_frame->{'values'}{$REF_ATTRIBUTE};

    my $ref_item_name = $save_frame->{'values'}{$REF_ATTRIBUTE}[0];
    if (!defined $dic_struct->{'Item'}{$ref_item_name}) {
        push @notes,
             "attribute '$REF_ATTRIBUTE' references the '$ref_item_name' " .
             'data item that is not defined in the given dictionary';
    } else {
        for my $data_name (@{get_all_data_names($save_frame)}) {
            if (lc $ref_item_name eq lc $data_name) {
                push @notes,
                     'definition references itself using the ' .
                     "'$REF_ATTRIBUTE' attribute";
                last;
            }
        }
    }

    my $type = lc get_type_contents(
                    lc get_data_name( $save_frame ),
                    $save_frame,
                    $dic_struct,
                    {
                      'resolve_implied_type' => 0,
                      'resolve_byreference_type' => 0,
                    }
                  );
    if ($type ne 'byreference') {
        push @notes,
             "data item contains the '$REF_ATTRIBUTE' attribute, but " .
             'the \'_type.contents\' attribute value is not set to ' .
             '\'ByReference\'';
    }

    return \@notes;
}

##
# Checks the usage of the '_type.indices_referenced_id' attribute.
#
# The subroutine checks that:
#   - Data item referenced using the attribute is defined in the dictionary.
#   - Definition does not reference itself using the attribute.
#   - Attribute '_type.container' value is set to 'Table' [1,2].
#   - Attribute '_type.indices' value is set to 'ByReference' [1,2].
#
# @source [1]
#       ddl.dic DDLm reference dictionary version 4.1.0,
#       definition of the '_type.indices_referenced_id' attribute.
# @source [2]
#       https://github.com/COMCIFS/cif_core/blob/491bf77f39ef2f989b9230ea90e6345f8282a4b7/ddl.dic#L1826
#
# @param $save_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @param $dic_struct
#       Dictionary search structure as returned by the
#       COD::CIF::DDL::DDLm::build_ddlm_dic() subroutine.
# @return
#       Array reference to a list of validation messages.
##
sub check_attribute_indices_referenced_id
{
    my ($save_frame, $dic_struct) = @_;

    my @notes;
    my $REF_ATTRIBUTE = '_type.indices_referenced_id';
    return \@notes if !defined $save_frame->{'values'}{$REF_ATTRIBUTE};

    my $ref_item_name = $save_frame->{'values'}{$REF_ATTRIBUTE}[0];
    if (!defined $dic_struct->{'Item'}{lc $ref_item_name}) {
        push @notes,
             "attribute '$REF_ATTRIBUTE' references the '$ref_item_name' " .
             'data item that is not defined in the given dictionary';
    } else {
        for my $data_name (@{get_all_data_names($save_frame)}) {
            if (lc $ref_item_name eq lc $data_name) {
                push @notes,
                     'definition references itself using the ' .
                     "'$REF_ATTRIBUTE' attribute";
                last;
            }
        }
    }

    my $container = get_type_container($save_frame);
    if (lc $container ne 'table') {
        push @notes,
             "definition contains the '$REF_ATTRIBUTE' attribute, but " .
             '\'the \'_type.container\' value is not set to \'Table\'';
    }

    # Set the default value of the '_type.indices' data item
    # as specified in DDLm reference dictionary, version 4.2.0 
    my $index_type = 'Text';
    if ( defined $save_frame->{'values'}{'_type.indices'} ) {
        $index_type = $save_frame->{'values'}{'_type.indices'}[0];
    }
    if (lc $index_type ne 'byreference') {
        push @notes,
             "definition contains the '$REF_ATTRIBUTE' attribute, but " .
             'the \'_type.indices\' attribute value is not set to ' .
             '\'ByReference\'';
    }

    return \@notes;
}

##
# Checks the usage of the '_definition_replaced.by' attribute.
#
# The subroutine checks that:
#   - Data items referenced using the attribute are defined in the dictionary.
#   - Definition does not reference itself using the attribute.
#
# @param $save_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @param $dic_struct
#       Dictionary search structure as returned by the
#       COD::CIF::DDL::DDLm::build_ddlm_dic() subroutine.
# @return
#       Array reference to a list of validation messages.
##
sub check_attribute_definition_replaced_by
{
    my ($save_frame, $dic_struct) = @_;

    my @notes;
    my $REF_ATTRIBUTE = '_definition_replaced.by';
    return \@notes if !defined $save_frame->{'values'}{$REF_ATTRIBUTE};

    for my $i (0..($#{$save_frame->{'values'}{$REF_ATTRIBUTE}})) {
        next if has_special_value( $save_frame, $REF_ATTRIBUTE, $i );
        my $ref_item_name = $save_frame->{'values'}{$REF_ATTRIBUTE}[$i];
        if (!defined $dic_struct->{'Item'}{lc $ref_item_name} ) {
            push @notes,
                 "attribute '$REF_ATTRIBUTE' references the '$ref_item_name' " .
                 'data item that is not defined in the given dictionary';
        } else {
            for my $data_name (@{get_all_data_names($save_frame)}) {
                if (lc $ref_item_name eq lc $data_name) {
                    push @notes,
                         'definition references itself using the ' .
                         "'$REF_ATTRIBUTE' attribute";
                    last;
                }
            }
        }
    }

    return \@notes;
}

##
# Checks if the data names used in the free-text description of the data
# item are defined in the dictionary. This subroutine treats all string
# that contain underscores as data item/category names thus false warnings
# might be produced.
#
# @param $save_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @param $dic_struct
#       Dictionary search structure as returned by the
#       COD::CIF::DDL::DDLm::build_ddlm_dic() subroutine.
# @return
#       Array reference to a list of validation messages.
##
sub check_references_in_descriptions
{
    my ( $save_frame, $dic_struct ) = @_;

    my @messages;

    return \@messages if !exists $save_frame->{'values'}{'_description.text'};

    my $description = $save_frame->{'values'}{'_description.text'}[0];
    while ( $description =~ m/([^\s]*_[^\s]*)/g ) {
        my $referenced_tag = $1;
        $referenced_tag =~ s/^[(']//;
        $referenced_tag =~ s/[\n.),']*$//;
        $referenced_tag = lc $referenced_tag;

        if ( $referenced_tag =~ m/^_/ ) {
            if (!exists $dic_struct->{'Item'}{$referenced_tag}) {
                push @messages,
                     'the save frame description seems to be ' .
                     "referencing the '$referenced_tag' data item " .
                     'which is not defined in the dictionary';
            }
        } else {
            if (!exists $dic_struct->{'Category'}{$referenced_tag}) {
                push @messages,
                     'the save frame description seems to be ' .
                     "referencing the '$referenced_tag' category " .
                     'which is not defined in the dictionary';
            }
        }
    }

    return \@messages;
}

##
# Checks if item definitions meet the requirements raised by the declared
# item purpose.
#
# @param $save_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @return
#       Array reference to a list of validation messages.
##
sub check_item_purpose
{
    my ( $save_frame ) = @_;

    my @validation_messages;

    my $type_purpose = lc get_type_purpose( $save_frame );

    if ( $type_purpose eq 'link' &&
         !exists $save_frame->{'values'}{'_name.linked_item_id'} ) {
        push @validation_messages,
             'incomplete data item definition -- data item is ' .
             "defined as having the 'Link' purpose, but the " .
             "'_name.linked_item_id' attribute is not provided";
    }

    if ( $type_purpose eq 'su' &&
         !exists $save_frame->{'values'}{'_name.linked_item_id'} ) {
        push @validation_messages,
             'incomplete data item definition -- data item is ' .
             "defined as having the 'SU' purpose, but the " .
             "'_name.linked_item_id' attribute is not provided";
    }

    if ( $type_purpose eq 'state' &&
         !exists $save_frame->{'values'}{'_enumeration_set.state'} ) {
        push @validation_messages,
             'incomplete data item definition -- data item is ' .
             "defined as having the 'State' purpose, but the " .
             "'_enumeration_set.state' attribute is not provided";
    }

    return \@validation_messages;
}

##
# Checks if definitions contain a proper revision date. A proper revision
# date should follow the ISO standard date format and be no later than
# the dictionary revision date.
#
# @param $save_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @param $dic_date
#       Text string that contains a valid dictionary revision date
#       as specified by the '_dictionary.date' data item.
# @return
#       Array reference to a list of validation messages.
##
sub check_update_date
{
    my ($save_frame, $dic_date) = @_;

    return [] if !defined $save_frame->{'values'}{'_definition.update'};
    my $item_date = $save_frame->{'values'}{'_definition.update'}[0];

    my @notes;
    if (!looks_like_iso_date($item_date)) {
        push @notes,
             "attribute '_definition.update' value '$item_date' does not " .
             'conform to the ISO standard date format <yyyy>-<mm>-<dd>'
    } elsif ($dic_date lt $item_date) {
        # ISO dates can be compared as simple text strings
        push @notes,
             'definition revision date is later than the dictionary revision ' .
             'date -- attribute \'_definition.update\' value should not ' .
             'exceed the \'_dictionary.date\' attribute value ' .
             "('$item_date' vs. '$dic_date')";
    }

    return \@notes;
}

##
# Evaluates if a text strings looks like an ISO date of the form
# <yyyy>-<mm>-<dd>. Proper date validation is not carried out.
#
# @input $date
#       Text string that contains the date.
# @return
#       Boolean value denoting if the given string looks like an ISO date.
##
sub looks_like_iso_date
{
    my ($date) = @_;

    return $date =~ m/^[0-9]{4}-[0-9]{2}-[0-9]{2}$/;
}

##
# Checks if all measurand data item definitions have associated standard
# uncertainty (SU) data item definitions as specified in the description
# of the measurand type purpose [1,2].
#
# @source [1]
#       ddl.dic DDLm reference dictionary version 4.1.0,
#       definition of the '_type.purpose' attribute.
# @source [2]
#       https://github.com/COMCIFS/cif_core/blob/491bf77f39ef2f989b9230ea90e6345f8282a4b7/ddl.dic#L1936
#
# @param $save_frames
#       Reference to an array of save frames as returned by
#       the COD::CIF::Parser.
# @return
#       Array reference to a list of validation messages.
##
sub check_su_item_presence
{
    my ( $save_frames ) = @_;

    my @notes;

    my %measurand_items;
    my %measurand_to_su;
    for my $save_frame (@{$save_frames}) {
        my $data_name = get_data_name( $save_frame );
        next if !defined $data_name;
        $data_name = lc $data_name;
        my $type_purpose = lc get_type_purpose( $save_frame );
        if ($type_purpose eq 'measurand') {
            $measurand_items{$data_name} = $save_frame;
        } elsif ($type_purpose eq 'su') {
            next if !exists $save_frame->{'values'}{'_name.linked_item_id'};
            my $linked_item_name =
                        lc $save_frame->{'values'}{'_name.linked_item_id'}[0];
            $measurand_to_su{$linked_item_name} = $data_name;
        }
    }

    for my $measurand_name (sort keys %measurand_items) {
        next if defined $measurand_to_su{$measurand_name};
        push @notes,
             "measurand data item '$measurand_name' must have an " .
             'associated standard uncertainty data item ' .
             "(i.e. '${measurand_name}_su')"
    }

    return \@notes;
}

##
# Checks if data names follow the IUCr data item naming convention that
# applies to the standard uncertainty (SU) data items.
#
# @param $save_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @return
#       Array reference to a list of validation messages.
##
sub check_su_naming_convention
{
    my ($save_frame) = @_;

    my @notes;

    my $data_name = get_data_name( $save_frame );
    return \@notes if !defined $data_name;
    $data_name = lc $data_name;

    my $type_purpose = lc get_type_purpose( $save_frame );
    if ($type_purpose ne 'su' && $data_name =~ /_su$/) {
        push @notes,
             'data item does not follow the IUCr naming convention -- ' .
             'only data names of standard uncertainty data items ' .
             'should have the \'_su\' postfix';
    };
    return \@notes if $type_purpose ne 'su';

    return \@notes if !exists $save_frame->{'values'}{'_name.linked_item_id'};
    my $linked_item_name = lc $save_frame->{'values'}{'_name.linked_item_id'}[0];


    if ($linked_item_name . '_su' ne $data_name) {
        push @notes,
             'data item does not follow the IUCr naming convention -- ' .
             'data names of standard uncertainty data items should be ' .
             'constructed by appending the \'_su\' postfix to the name of ' .
             'the associated measurand data item ' .
             "('${linked_item_name}_su' instead of '$data_name')";
    }

    return \@notes;
}

##
# Checks if the use of the type dimension attribute is compatible with
# the rest of the data item definition as specified in the description
# of the 'Dimension' content type [1,2].
#
# @source [1]
#       ddl.dic DDLm reference dictionary version 4.1.0,
#       definition of the '_type.contents' attribute.
# @source [2]
#       https://github.com/COMCIFS/cif_core/blob/491bf77f39ef2f989b9230ea90e6345f8282a4b7/ddl.dic#L1672
#
# @param $save_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @return
#       Array reference to a list of validation messages.
##
sub check_type_dimension_applicablity
{
    my ($save_frame) = @_;

    my @notes;

    return \@notes if !exists $save_frame->{'values'}{'_type.dimension'};

    my $container = get_type_container($save_frame);
    my @allowed_containers = qw( List Array Matrix );

    my $lc_container = lc $container;
    for my $lc_allowed_container (map { lc } @allowed_containers) {
        return \@notes if $lc_container eq $lc_allowed_container;
    }

    push @notes,
         'the \'_type.dimension\' attribute is not compatible with ' .
         "the '$container' container type -- compatible " .
         '\'_type.container\' attribute values include ' .
         '[' . ( join ', ', map {"'$_'"} @allowed_containers ) . ']';

    return \@notes;
}

##
# Checks if measurement units are explicitly provided in the definitions
# of quantifiable data items.
#
# @param $save_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @return
#       Array reference to a list of validation messages.
##
sub check_measurement_units
{
    my ($save_frame, $dic_struct) = @_;

    return [] if !defined get_data_name( $save_frame );

    my @validation_messages;

    my $has_measurement_units = 0;
    if ( defined get_measurement_unit( $save_frame ) ) {
        $has_measurement_units = 1
    } else {
        my $methods = get_methods( $save_frame );
        if ( defined $methods ) {
            for my $i ( 0..$#{$methods->{'purpose'}} ) {
                next if lc $methods->{'purpose'}[$i] ne 'definition';
                if ( $methods->{'expression'}[$i] =~ m/(^|\s+)_units[.]code\s+=/ ) {
                    $has_measurement_units = 1;
                    last;
                }
            }
        }
    }

    my $type = lc get_type_contents(
        lc get_data_name( $save_frame ),
        $save_frame,
        $dic_struct
    );

    my @numeric_types = qw( integer real imag complex );
    if ( any { $type eq $_ } @numeric_types ) {
        if (!$has_measurement_units) {
            push @validation_messages,
                "content type '$type' should be accompanied by " .
                "the '_units.code' attribute -- it is recommended to assign " .
                'units of measurement to all data items with numeric ' .
                'content types';
        }
    } else {
        if ($has_measurement_units) {
            push @validation_messages,
                 "content type '$type' may be incompatible with the " .
                 "'_units.code' attribute -- units of measurement are " .
                 'normally assigned only to data items with numeric ' .
                 'content types';
        }
    }

    return \@validation_messages;
}

##
# Checks if measurand data items and the related SU data items are assigned
# the same units of measurement.
#
# @param $save_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @param $dic_data_block
#       Dictionary data block as returned by the COD::CIF::Parser.
# @return
#       Array reference to a list of validation messages.
##
sub check_measurand_su_unit_compatibility
{
    my ($save_frame, $dic_data_block) = @_;

    my @notes;

    return \@notes if !defined get_data_name( $save_frame );
    return \@notes if lc get_type_purpose( $save_frame ) ne 'su';

    my $su_unit = get_measurement_unit($save_frame);
    return \@notes if !defined $su_unit;

    return \@notes if !exists $save_frame->{'values'}{'_name.linked_item_id'};
    my $measurand_name = lc $save_frame->{'values'}{'_name.linked_item_id'}[0];
    my $measurand_item;
    for ( @{$dic_data_block->{'save_blocks'}} ) {
        if ( lc get_data_name( $_ ) eq $measurand_name ) {
            $measurand_item = $_;
            last;
        }
    }
    return \@notes if !defined $measurand_item;
    my $measurand_unit = get_measurement_unit($measurand_item);
    return \@notes if !defined $measurand_unit;

    if (lc $measurand_unit ne lc $su_unit) {
        push @notes,
             'the \'_units.code\' attribute value does not match the ' .
             'value specified in the definition of the linked ' .
             "data item '$measurand_name' ('$su_unit' vs. '$measurand_unit') " .
             '-- standard uncertainty data items should have the same ' .
             'units of measurement as the associated measurand data items';
    }

    return \@notes;
}

##
# Extracts the measurement unit code from a data item definition frame.
#
# @param $data_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @return $units
#       String containing the measurement unit code or undef value if
#       the data frame does not contain a measurement unit code.
##
sub get_measurement_unit
{
    my ( $data_frame ) = @_;

    return if !exists $data_frame->{'values'}{'_units.code'};

    my $units = lc $data_frame->{'values'}{'_units.code'}[0];

    return $units;
}

##
# Checks the compatibility of linked data items in regards
# to the _type.contents, _type.container, _enumeration.range,
# _units.code and _type.dimension attributes. Does not cover
# standard uncertainty (SU) data items.
#
# @param $save_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @param $dic_data_block
#       Dictionary data block as returned by the COD::CIF::Parser.
# @return
#       Array reference to a list of validation messages.
##
sub check_linked_item_compatibility
{
    my ($save_frame, $dic_data_block) = @_;

    my @notes;

    return \@notes if lc get_type_purpose( $save_frame ) eq 'su';
    return \@notes if !exists $save_frame->{'values'}{'_name.linked_item_id'};
    my $measurand_name = lc $save_frame->{'values'}{'_name.linked_item_id'}[0];

    return \@notes if !exists $save_frame->{'values'}{'_name.linked_item_id'};
    my $parent_name = lc $save_frame->{'values'}{'_name.linked_item_id'}[0];
    my $parent_item;
    for my $dic_save_frame ( @{$dic_data_block->{'save_blocks'}} ) {
        if ( lc get_data_name( $dic_save_frame ) eq $parent_name ) {
            $parent_item = $dic_save_frame;
            last;
        }
    }
    return \@notes if !defined $parent_item;

    my @comparable_attributes = qw(
        _type.contents
        _type.container
        _enumeration.range
        _units.code
        _type.dimension
    );

    for my $attribute (@comparable_attributes) {
        my $parent_attribute = get_attribute_value( $attribute, $parent_item );
        my $child_attribute  = get_attribute_value( $attribute, $save_frame );

        next if !defined $parent_attribute && !defined $child_attribute;
        if (!defined $parent_attribute) {
            push @notes,
                    'incompatible definition of the linked data item -- ' .
                    "definition of the '$parent_name' data item does not " .
                    "contain the '$attribute' attribute";
            next;
        }
        if (!defined $child_attribute) {
            push @notes,
                    'incompatible definition of the linked data item -- ' .
                    "definition of the '$parent_name' data item contains " .
                    "an extra '$attribute' attribute";
            next;
        }
        if ($parent_attribute ne $child_attribute) {
            push @notes,
                    'incompatible definition of the linked data item -- ' .
                    "attribute '$attribute' value does not match " .
                    'the value specified in the definition of ' .
                    "the linked data item '$parent_name' " .
                    "('$child_attribute' vs. '$parent_attribute')";
        }
    }

    return \@notes;
}

##
# Returns a single attribute value from a save frame.
#
# @param $attribute
#       Data name of the attribute that should be returned.
# @param $data_frame
#       Data frame as returned by the COD::CIF::Parser.
# @return
#       Value of the attribute or
#       undef if the attribute is not defined
##
sub get_attribute_value
{
    my ($attribute, $data_frame) = @_;

    if (defined $data_frame->{'values'}{$attribute}) {
        return $data_frame->{'values'}{$attribute}[0];
    }

    return;
}

##
# Extracts all methods from a data item definition frame. Malformed method
# definitions (i.e. missing data items, data items that reside in separate
# loops) are silently ignored.
#
# @param $data_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @return $methods
#       Reference to a data structure of the following form:
#       {
#           'purpose' => [
#               'Evaluation',
#               'Definition',
#           ],
#           'expression' => [
#               '_atom_type.radius_contact = _atom_type.radius_bond + 1.25',
#               '_enumeration.default =  0.',
#           ]
#       }
#
#       or undef value if the data frame does not contain any method definitions.
##
sub get_methods
{
    my ( $data_frame ) = @_;

    my @method_items = qw( _method.purpose _method.expression );
    for my $method_item ( @method_items ) {
        return if !exists $data_frame->{'values'}{$method_item}
    }
    return if !are_same_loop_items( $data_frame, \@method_items );

    my %methods;
    $methods{'purpose'}    = [ @{$data_frame->{'values'}{'_method.purpose'}} ];
    $methods{'expression'} = [ @{$data_frame->{'values'}{'_method.expression'}} ];

    return \%methods;
}

##
# Determines if all of the provided data items appear in the same loop.
# All unlooped items are treated as appearing in the same loop.
#
# @param $data_frame
#       Data item definition frame as returned by the COD::CIF::Parser.
# @param $items
#       Reference to an array of data items that should be checked.
# @return
#       '1' if all items appear in the same loop,
#       '0' otherwise.
##
sub are_same_loop_items
{
    my ( $data_frame, $items ) = @_;

    my $UNLOOPED_INDEX = -1;

    my $previous_item_loop = get_item_loop_index( $data_frame, $items->[-1] );
    $previous_item_loop = $UNLOOPED_INDEX if !defined $previous_item_loop;
    for my $i ( 0 .. ( $#{$items} - 1 ) ) {
        my $current_item_loop = get_item_loop_index( $data_frame, $items->[$i] );
        $current_item_loop = $UNLOOPED_INDEX if !defined $current_item_loop;
        return 0 if $current_item_loop ne $previous_item_loop;
        $previous_item_loop = $current_item_loop;
    }

    return 1;
}

##
# Returns range limits based on the given DDLm content type.
#
# @param $type
#       Content type.
# @return
#       Reference to an array containing the range limit values.
##
sub get_enum_range_from_type
{
    my ($type) = @_;

    $type = lc $type;
    my @range = (undef, undef);

    if ( $type eq 'count' ) {
        @range = (0, undef);
    } elsif ( $type eq 'index' ) {
        @range = (1, undef);
    }

    return \@range;
}

##
# Determines if one range is a subrange of the other.
#
# @param $range
#       Array reference to the range limits.
# @param $subrange
#       Array reference to the subrange limits.
# @param $options
#       Reference to an option hash. The following options are
#       recognised:
#       {
#           # type of the enumeration range ('numb' or 'char')
#           'type' => 'numb'
#       }
# @return
#       Reference to an array containing the range limit values.
##
sub is_subrange
{
    my ($range, $subrange, $options) = @_;

    my $is_in_lower_range = !defined $subrange->[0] ||
        is_in_range( $subrange->[0], {
                        'range' => $range,
                        'type'  => $options->{'type'}
                    } );

    my $is_in_upper_range = !defined $subrange->[1] ||
        is_in_range( $subrange->[1], {
                        'range' => $range,
                        'type'  => $options->{'type'}
                    } );

    return $is_in_lower_range && $is_in_upper_range;
}

##
# Compares two floating point numbers using given decimal point precision.
# @param $float_1
#       First floating point number.
# @param $float_2
#       Second floating point number.
# @param $float_2
#       Decimal point digit precision.
# @return
#       1 if numbers are equal, 0 otherwise.
##
sub equals
{
    my ($float_1, $float_2, $dp) = @_;
    return ( ( sprintf "%.${dp}f", $float_1 ) eq
             ( sprintf "%.${dp}f", $float_2 ) ) ? 1 : 0;
}

my $use_parser = 'c';
my @dic_import_path;
my $report_redundant_range_limits = 0;
my $check_references_in_descriptions = 0;

#* OPTIONS:
#*
#*   --report-redundant-range-limits
#*                     Report explicit range limits that match implicit
#*                     range limits imposed by the content type.
#*   --no-report-redundant-range-limits
#*                     Do not report explicit range limits that match implicit
#*                     range limits imposed by the content type (default).
#*
#*   --check-references-in-descriptions
#*                     Check if the data names referenced in the free-text
#*                     descriptions of other data items are defined in the
#*                     dictionary. This check use ad hoc code to recognise
#*                     data names and thus might produce false-negatives.
#*   --no-check-references-in-descriptions
#*                     Do not check if the data names referenced in the
#*                     free-text descriptions of other data items are defined
#*                     in the dictionary (default).
#*
#*   -I, --add-ddlm-import-path './ddlm/cod/'
#*                     Prepend an additional directory to the dictionary
#*                     import path. The dictionary import path specifies
#*                     a list of directories in which to look for files
#*                     that are imported by DDLm-compliant CIF dictionaries.
#*                     Directories provided using this option are assigned
#*                     the highest priority and are searched prior to
#*                     the directories listed in the COD_TOOLS_DDLM_IMPORT_PATH
#*                     environment variable or the default import path
#*                     (directory of the importing dictionary).
#*   --clear-ddlm-import-path
#*                     Remove all directories from the dictionary import path
#*                     that were added using the --add-ddlm-import-path option.
#*                     Neither COD_TOOLS_DDLM_IMPORT_PATH environment variable
#*                     nor the default import path is affected by this option.
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    '-I,--add-ddlm-import-path' => sub { push @dic_import_path, get_value() },
    '--clear-ddlm-import-path'  => sub { @dic_import_path = () },

    '--report-redundant-range-limits'    =>
            sub { $report_redundant_range_limits = 1 },
    '--no-report-redundant-range-limits' =>
            sub { $report_redundant_range_limits = 0 },

    '--check-references-in-descriptions' =>
            sub { $check_references_in_descriptions = 1 },
    '--no-check-references-in-descriptions' =>
            sub { $check_references_in_descriptions = 0 },

    '--options'      => sub{ options; exit },
    '--help,--usage' => sub{ usage; exit },
    '--version'      => sub { print get_version_string(), "\n"; exit }
);

my $die_on_error_level = {
    'ERROR'   => 1,
    'WARNING' => 0,
    'NOTE'    => 0
};

push @dic_import_path, @{get_ddlm_import_path_from_env()};
for my $filename ( @ARGV ) {
    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    my ( $data, $err_count, $messages ) = parse_cif( $filename, $options );
    process_parser_messages( $messages, $die_on_error_level );

    $data = $data->[0];

    local $SIG{__WARN__} = sub {
        process_warnings( {
            'message'  => @_,
            'program'  => $0,
            'filename' => $filename,
            'add_pos'  => $data->{'name'}
        }, $die_on_error_level )
    };

    my $dictionary_class = get_dictionary_class($data);
    if ( $dictionary_class ne 'Instance' &&
         $dictionary_class ne 'Reference' ) {
        warn "WARNING, dictionaries of the '$dictionary_class' dictionary " .
             'class are currently not supported -- file will be skipped' . "\n";
        next;
    }

    my ($dirs) = (fileparse($filename))[1];
    $data = resolve_dic_imports(
        $data,
        {
           'import_path'        => [ @dic_import_path, $dirs ],
           'parser_options'     => $options,
           'die_on_error_level' => $die_on_error_level,
           'importing_file'     => $filename,
        }
    );

    my $dic_struct = build_ddlm_dic(
                            $data,
                            {
                              'resolve_content_types' => 0,
                            }
                     );

    foreach ( @{check_head_category($data)} ) {
        print "$0: $filename: $_.\n";
    }

    my $block_header  = "data_$data->{'name'}";
    my $save_frames = $data->{'save_blocks'};
    my @block_warnings;
    push @block_warnings, @{check_dictionary_audit_loop($data)};
    push @block_warnings, @{check_save_frame_code_uniqueness($save_frames)};
    push @block_warnings, @{check_data_name_uniqueness($save_frames)};
    push @block_warnings, @{check_su_item_presence($save_frames)};

    my $dictionary_revision_date;
    if (defined $data->{'values'}{'_dictionary.date'}) {
        $dictionary_revision_date = $data->{'values'}{'_dictionary.date'}[0];
        if (!looks_like_iso_date($dictionary_revision_date)) {
            push @block_warnings,
                 'attribute \'_dictionary.date\' value ' .
                 "'$dictionary_revision_date' does not conform to the ISO " .
                 'standard date format <yyyy>-<mm>-<dd>';
            $dictionary_revision_date = undef;
        }
    }

    for (@block_warnings) {
        print "$0: $filename $block_header: $_.\n";
    }

    for my $save_frame ( @{$save_frames} ) {
        my @warnings;

        if ( $check_references_in_descriptions ) {
            push @warnings, @{ check_references_in_descriptions(
                                    $save_frame,
                                    $dic_struct
                               ) };
        }

        push @warnings, @{check_category_ids($save_frame, $data)};
        if ( lc get_definition_scope($save_frame) eq 'item' ) {
            push @warnings, @{ check_aliases($save_frame) };
            push @warnings, @{ check_enumeration_range(
                                $save_frame,
                                $dic_struct,
                                { 'report_redundant_range_limits' =>
                                    $report_redundant_range_limits }
                            ) };
            push @warnings, @{ check_enumeration_default( $save_frame ) };
            push @warnings, @{ check_attribute_enumeration_def_index_id(
                                    $save_frame,
                                    $dic_struct
                            ) };
            push @warnings, @{ check_measurement_units(
                                    $save_frame,
                                    $dic_struct
                            ) };
            push @warnings, @{ check_su_naming_convention($save_frame) };
            push @warnings, @{ check_type_dimension_applicablity($save_frame) };
            push @warnings, @{ check_measurand_su_unit_compatibility(
                                    $save_frame,
                                    $data
                            ) };
        };
        push @warnings, @{ check_item_purpose($save_frame) };
        push @warnings, @{ check_linked_items($save_frame, $dic_struct) };
        push @warnings, @{ check_linked_item_compatibility(
                                $save_frame,
                                $data
                        ) };
        push @warnings, @{ check_attribute_definition_replaced_by(
                                $save_frame,
                                $dic_struct
                        ) };
        push @warnings, @{ check_category_key_name(
                                $save_frame,
                                $dic_struct
                        ) };
        push @warnings, @{ check_attribute_indices_referenced_id(
                                $save_frame,
                                $dic_struct
                        ) };
        push @warnings, @{ check_attribute_type_contents_referenced_id(
                            $save_frame,
                            $dic_struct
                        ) };
        push @warnings, @{ check_definition_class( $save_frame ) };
        if (defined $dictionary_revision_date) {
            push @warnings,
                 @{ check_update_date($save_frame, $dictionary_revision_date) };
        }
        my $frame_header = "save_$save_frame->{'name'}";
        foreach ( @warnings ) {
            print "$0: $filename $block_header $frame_header: WARNING, $_.\n";
        }
    }
}
