Skip to content

Utility functions


Convert chars to ranks

  1. void ivs::convert_char_to_rank<Alphabet>(std::span<char const> in, std::span<uint8_t> out)
  2. auto ivs::convert_char_to_rank<Alphabet>(std::span<char const> in) -> std::vector<uint8_t>
  3. auto ivs::view_char_to_rank<Alphabet> = /*unspecified*/

Conversion from char space to rank space. Version 1 and 3 will never throw. Version 2 might throw inside of std::vector. Characters invalid to Alphabet will be converted to 255 and can be checked via verify_rank.

Example

// SPDX-FileCopyrightText: 2006-2023, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <ivsigma/ivsigma.h>
#include <iostream>

int main()
{
    auto input = std::string{"ACCACGT"};
    { // Version 1
        auto output = std::vector<uint8_t>{};
        output.resize(input.size());
        ivs::convert_char_to_rank<ivs::dna4>(input, output);
        for (auto r : output) {
            std::cout << (int)r;
        }
        std::cout << '\n';
    }
    { // Version 2
        std::vector<uint8_t> output = ivs::convert_char_to_rank<ivs::dna4>(input);
        for (auto r : output) {
            std::cout << (int)r;
        }
        std::cout << '\n';
    }
    { // Version 3
        auto output_view = input | ivs::view_char_to_rank<ivs::dna4>;
        auto output = std::vector<uint8_t>(output_view.begin(), output_view.end());
        for (auto r : output) {
            std::cout << (int)r;
        }
        std::cout << '\n';
    }
}
Output:
0110123
0110123
0110123


Convert ranks to chars

  1. void ivs::convert_rank_to_char<Alphabet>(std::span<char const> in, std::span<uint8_t> out)
  2. auto ivs::convert_rank_to_char<Alphabet>(std::span<char const> in) -> std::string
  3. auto ivs::view_rank_to_charchar<Alphabet> = /*unspecified*/

Conversion from rank space to char space. Version 1 and 3 will never throw. Version 2 might throw inside of std::string. Ranks invalid to Alphabet will be converted to \0 and can be checked via verify_char.

Example

// SPDX-FileCopyrightText: 2006-2023, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <ivsigma/ivsigma.h>
#include <iostream>

int main()
{
    //                                 G  G  C  T  A
    auto input = std::vector<uint8_t>{ 2, 2, 1, 3, 0};
    { // Version 1
        auto output = std::string{};
        output.resize(input.size());
        ivs::convert_rank_to_char<ivs::dna4>(input, output);
        std::cout << output << '\n';
    }
    { // Version 2
        std::string output = ivs::convert_rank_to_char<ivs::dna4>(input);
        std::cout << output << '\n';
    }
    { // Version 3
        auto output_view = input | ivs::view_rank_to_char<ivs::dna4>;
        auto output = std::string(output_view.begin(), output_view.end());
        std::cout << output << '\n';
    }
}
Output:
GGCTA
GGCTA
GGCTA


Normalize chars

  1. void ivs::normalize_char<Alphabet>(std::span<char const> in, std::span<char> out)
  2. auto ivs::normalize_char<Alphabet>(std::span<char const> in) -> std::string
  3. auto ivs::view_normalize_char<Alphabet> = /*unspecified*/

Normalizes string according to Alphabet. For example using dna4 and providing the string AaCcGgTt will convert it to AACCGGTT. Additionally some alphabets have some special rules. For dna4 the letter U will also be normalized to T. Version 1 and 3 will never throw. Version 2 might throw. Characters invalid to Alphabet will be converted to 255 and can be checked via verify_rank.

Example

// SPDX-FileCopyrightText: 2006-2023, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <ivsigma/ivsigma.h>
#include <iostream>

int main()
{
    auto input = std::string{"AaCcGgTtUu"};
    { // Version 1
        auto output = std::string{};
        output.resize(input.size());
        ivs::normalize_char<ivs::dna4>(input, output);
        std::cout << output << '\n';
    }
    { // Version 2
        std::string output = ivs::normalize_char<ivs::dna4>(input);
        std::cout << output << '\n';
    }
    { // Version 3
        auto output_view = input | ivs::view_normalize_char<ivs::dna4>;
        auto output = std::string(output_view.begin(), output_view.end());
        std::cout << output << '\n';
    }
}
Output:
AACCGGTTTT
AACCGGTTTT
AACCGGTTTT


Complement

  1. void ivs::complement_rank<Alphabet>(std::span<uint8_t const> in, std::span<uint8_t> out)
  2. auto ivs::complement_rank<Alphabet>(std::span<uint8_t const> in) -> std::vector<uint8_t>
  3. auto ivs::view_complement_rank<Alphabet> = /*unspecified*/
  4. void ivs::complement_char<Alphabet>(std::span<char const> in, std::span<char> out)
  5. auto ivs::complement_char<Alphabet>(std::span<char const> in) -> std::string
  6. auto ivs::view_complement_char<Alphabet> = /*unspecified*/

Computes the complement according to Alphabet. It is required that Alphabet has the concept alphabet_with_complement_c. Version 1, 3, 4 and 6 never throw. Version 2 and 5 might throw inside of std::vector or std::string. Invalid ranks in *_rank functions will be converted to 0. in the *_char version, invalid letters will be converted to \0.

Example

// SPDX-FileCopyrightText: 2006-2023, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <ivsigma/ivsigma.h>
#include <iostream>

int main()
{
    { // complement_rank
        //                                 A  C  G  T
        auto input = std::vector<uint8_t>{ 0, 1, 2, 3};
        { // Version 1
            auto output = std::vector<uint8_t>{};
            output.resize(input.size());
            ivs::complement_rank<ivs::dna4>(input, output);
            for (auto r : output) {
                std::cout << (int)r;
            }
            std::cout << '\n';
        }

        { // Version 2
            std::vector<uint8_t> output = ivs::complement_rank<ivs::dna4>(input);
            for (auto r : output) {
                std::cout << (int)r;
            }
            std::cout << '\n';
        }
        { // Version 3
            auto output_view = ivs::complement_rank<ivs::dna4>(input);
            auto output = std::vector<uint8_t>(output_view.begin(), output_view.end());
            for (auto r : output) {
                std::cout << (int)r;
            }
            std::cout << '\n';
        }
    }

    { // complement_char
        auto input = std::string{"AaCcGgTtUu"};
        { // Version 4
            auto output = std::string{};
            output.resize(input.size());
            ivs::complement_char<ivs::dna4>(input, output);
            std::cout << output << '\n';
        }
        { // Version 5
            std::string output = ivs::complement_char<ivs::dna4>(input);
            std::cout << output << '\n';
        }
        { // Version 6
            auto output_view = input | ivs::view_complement_char<ivs::dna4>;
            auto output = std::string(output_view.begin(), output_view.end());
            std::cout << output << '\n';
        }
    }
}
Output:
3210
3210
3210
TTGGCCAAAA
TTGGCCAAAA
TTGGCCAAAA


Reverse complement

  1. void ivs::reverse_complement_rank<Alphabet>(std::span<uint8_t const> in, std::span<uint8_t> out)
  2. auto ivs::reverse_complement_rank<Alphabet>(std::span<uint8_t const> in) -> std::vector<uint8_t>
  3. auto ivs::view_reverse_complement_rank<Alphabet> = /*unspecified*/
  4. void ivs::reverse_complement_char<Alphabet>(std::span<char const> in, std::span<char> out)
  5. auto ivs::reverse_complement_char<Alphabet>(std::span<char const> in) -> std::string
  6. auto ivs::view_reverse_complement_char<Alphabet> = /*unspecified*/

Computes the reverse complement according to Alphabet. It is required that Alphabet has the concept alphabet_with_complement_c. Version 1, 3, 4 and 6 never throw. Version 2 and 5 might throw inside of std::vector or std::string. Invalid ranks in *_rank functions will be converted to 0. In the *_char version, invalid letters will be converted to \0.

Example

// SPDX-FileCopyrightText: 2006-2023, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <ivsigma/ivsigma.h>
#include <iostream>

int main()
{
    { // reverse_complement_rank
        //                                 A  C  G  T
        auto input = std::vector<uint8_t>{ 0, 1, 2, 3};
        { // Version 1
            auto output = std::vector<uint8_t>{};
            output.resize(input.size());
            ivs::reverse_complement_rank<ivs::dna4>(input, output);
            for (auto r : output) {
                std::cout << (int)r;
            }
            std::cout << '\n';
        }

        { // Version 2
            std::vector<uint8_t> output = ivs::reverse_complement_rank<ivs::dna4>(input);
            for (auto r : output) {
                std::cout << (int)r;
            }
            std::cout << '\n';
        }
        { // Version 3
            auto output_view = ivs::view_reverse_complement_rank<ivs::dna4>(input);
            auto output = std::vector<uint8_t>(output_view.begin(), output_view.end());
            for (auto r : output) {
                std::cout << (int)r;
            }
            std::cout << '\n';
        }
    }

    { // reverse_complement_char
        auto input = std::string{"AaCcGgTtUu"};
        { // Version 4
            auto output = std::string{};
            output.resize(input.size());
            ivs::reverse_complement_char<ivs::dna4>(input, output);
            std::cout << output << '\n';
        }
        { // Version 5
            std::string output = ivs::reverse_complement_char<ivs::dna4>(input);
            std::cout << output << '\n';
        }
        { // Version 6
            auto output_view = input | ivs::view_reverse_complement_char<ivs::dna4>;
            auto output = std::string(output_view.begin(), output_view.end());
            std::cout << output << '\n';
        }
    }
}
Output:
0123
0123
0123
AAAACCGGTT
AAAACCGGTT
AAAACCGGTT


Verification

  1. std::optional<size_t> verify_char(std::span<char const> in)
  2. std::optional<size_t> verify_rank(std::span<uint8_t const> in)

Verifies if characters and ranks are valid. Invalid ranks are 255 and invalid characters \0. If an invalid char/rank is found, this function reports its position. These function never throw.

Example

// SPDX-FileCopyrightText: 2006-2023, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <ivsigma/ivsigma.h>
#include <iostream>

int main()
{
    { // reverse_complement_rank
        //                                 A  C  G  T
        auto input = std::vector<uint8_t>{ 0, 1, 2, 3};
        { // Version 1
            auto output = std::vector<uint8_t>{};
            output.resize(input.size());
            ivs::reverse_complement_rank<ivs::dna4>(input, output);
            for (auto r : output) {
                std::cout << (int)r;
            }
            std::cout << '\n';
        }

        { // Version 2
            std::vector<uint8_t> output = ivs::reverse_complement_rank<ivs::dna4>(input);
            for (auto r : output) {
                std::cout << (int)r;
            }
            std::cout << '\n';
        }
        { // Version 3
            auto output_view = ivs::view_reverse_complement_rank<ivs::dna4>(input);
            auto output = std::vector<uint8_t>(output_view.begin(), output_view.end());
            for (auto r : output) {
                std::cout << (int)r;
            }
            std::cout << '\n';
        }
    }

    { // reverse_complement_char
        auto input = std::string{"AaCcGgTtUu"};
        { // Version 4
            auto output = std::string{};
            output.resize(input.size());
            ivs::reverse_complement_char<ivs::dna4>(input, output);
            std::cout << output << '\n';
        }
        { // Version 5
            std::string output = ivs::reverse_complement_char<ivs::dna4>(input);
            std::cout << output << '\n';
        }
        { // Version 6
            auto output_view = input | ivs::view_reverse_complement_char<ivs::dna4>;
            auto output = std::string(output_view.begin(), output_view.end());
            std::cout << output << '\n';
        }
    }
}
Output:
0123
0123
0123
AAAACCGGTT
AAAACCGGTT
AAAACCGGTT