Utility functions¶
Convert chars to ranks¶
void ivs::convert_char_to_rank<Alphabet>(std::span<char const> in, std::span<uint8_t> out)
auto ivs::convert_char_to_rank<Alphabet>(std::span<char const> in) -> std::vector<uint8_t>
auto ivs::view_char_to_rank<Alphabet> = /*unspecified*/
Conversion from char space to rank space. Version 1 and 3 will never throw. Version 2 might throw inside of std::vector
.
Characters invalid to Alphabet
will be converted to 255
and can be checked via verify_rank
.
Example¶
// SPDX-FileCopyrightText: 2006-2023, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <ivsigma/ivsigma.h>
#include <iostream>
int main()
{
auto input = std::string{"ACCACGT"};
{ // Version 1
auto output = std::vector<uint8_t>{};
output.resize(input.size());
ivs::convert_char_to_rank<ivs::dna4>(input, output);
for (auto r : output) {
std::cout << (int)r;
}
std::cout << '\n';
}
{ // Version 2
std::vector<uint8_t> output = ivs::convert_char_to_rank<ivs::dna4>(input);
for (auto r : output) {
std::cout << (int)r;
}
std::cout << '\n';
}
{ // Version 3
auto output_view = input | ivs::view_char_to_rank<ivs::dna4>;
auto output = std::vector<uint8_t>(output_view.begin(), output_view.end());
for (auto r : output) {
std::cout << (int)r;
}
std::cout << '\n';
}
}
0110123
0110123
0110123
Convert ranks to chars¶
void ivs::convert_rank_to_char<Alphabet>(std::span<char const> in, std::span<uint8_t> out)
auto ivs::convert_rank_to_char<Alphabet>(std::span<char const> in) -> std::string
auto ivs::view_rank_to_charchar<Alphabet> = /*unspecified*/
Conversion from rank space to char space.
Version 1 and 3 will never throw. Version 2 might throw inside of std::string
.
Ranks invalid to Alphabet
will be converted to \0
and can be checked via verify_char
.
Example¶
// SPDX-FileCopyrightText: 2006-2023, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <ivsigma/ivsigma.h>
#include <iostream>
int main()
{
// G G C T A
auto input = std::vector<uint8_t>{ 2, 2, 1, 3, 0};
{ // Version 1
auto output = std::string{};
output.resize(input.size());
ivs::convert_rank_to_char<ivs::dna4>(input, output);
std::cout << output << '\n';
}
{ // Version 2
std::string output = ivs::convert_rank_to_char<ivs::dna4>(input);
std::cout << output << '\n';
}
{ // Version 3
auto output_view = input | ivs::view_rank_to_char<ivs::dna4>;
auto output = std::string(output_view.begin(), output_view.end());
std::cout << output << '\n';
}
}
GGCTA
GGCTA
GGCTA
Normalize chars¶
void ivs::normalize_char<Alphabet>(std::span<char const> in, std::span<char> out)
auto ivs::normalize_char<Alphabet>(std::span<char const> in) -> std::string
auto ivs::view_normalize_char<Alphabet> = /*unspecified*/
Normalizes string according to Alphabet
. For example using dna4
and providing the string AaCcGgTt
will convert it
to AACCGGTT
. Additionally some alphabets have some special rules. For dna4
the letter U
will also be normalized to T
.
Version 1 and 3 will never throw. Version 2 might throw.
Characters invalid to Alphabet
will be converted to 255
and can be checked via verify_rank
.
Example¶
// SPDX-FileCopyrightText: 2006-2023, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <ivsigma/ivsigma.h>
#include <iostream>
int main()
{
auto input = std::string{"AaCcGgTtUu"};
{ // Version 1
auto output = std::string{};
output.resize(input.size());
ivs::normalize_char<ivs::dna4>(input, output);
std::cout << output << '\n';
}
{ // Version 2
std::string output = ivs::normalize_char<ivs::dna4>(input);
std::cout << output << '\n';
}
{ // Version 3
auto output_view = input | ivs::view_normalize_char<ivs::dna4>;
auto output = std::string(output_view.begin(), output_view.end());
std::cout << output << '\n';
}
}
AACCGGTTTT
AACCGGTTTT
AACCGGTTTT
Complement¶
void ivs::complement_rank<Alphabet>(std::span<uint8_t const> in, std::span<uint8_t> out)
auto ivs::complement_rank<Alphabet>(std::span<uint8_t const> in) -> std::vector<uint8_t>
auto ivs::view_complement_rank<Alphabet> = /*unspecified*/
void ivs::complement_char<Alphabet>(std::span<char const> in, std::span<char> out)
auto ivs::complement_char<Alphabet>(std::span<char const> in) -> std::string
auto ivs::view_complement_char<Alphabet> = /*unspecified*/
Computes the complement according to Alphabet
. It is required that Alphabet
has the concept alphabet_with_complement_c
.
Version 1, 3, 4 and 6 never throw. Version 2 and 5 might throw inside of std::vector
or std::string
.
Invalid ranks in *_rank
functions will be converted to 0
.
in the *_char
version, invalid letters will be converted to \0
.
Example¶
// SPDX-FileCopyrightText: 2006-2023, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <ivsigma/ivsigma.h>
#include <iostream>
int main()
{
{ // complement_rank
// A C G T
auto input = std::vector<uint8_t>{ 0, 1, 2, 3};
{ // Version 1
auto output = std::vector<uint8_t>{};
output.resize(input.size());
ivs::complement_rank<ivs::dna4>(input, output);
for (auto r : output) {
std::cout << (int)r;
}
std::cout << '\n';
}
{ // Version 2
std::vector<uint8_t> output = ivs::complement_rank<ivs::dna4>(input);
for (auto r : output) {
std::cout << (int)r;
}
std::cout << '\n';
}
{ // Version 3
auto output_view = ivs::complement_rank<ivs::dna4>(input);
auto output = std::vector<uint8_t>(output_view.begin(), output_view.end());
for (auto r : output) {
std::cout << (int)r;
}
std::cout << '\n';
}
}
{ // complement_char
auto input = std::string{"AaCcGgTtUu"};
{ // Version 4
auto output = std::string{};
output.resize(input.size());
ivs::complement_char<ivs::dna4>(input, output);
std::cout << output << '\n';
}
{ // Version 5
std::string output = ivs::complement_char<ivs::dna4>(input);
std::cout << output << '\n';
}
{ // Version 6
auto output_view = input | ivs::view_complement_char<ivs::dna4>;
auto output = std::string(output_view.begin(), output_view.end());
std::cout << output << '\n';
}
}
}
3210
3210
3210
TTGGCCAAAA
TTGGCCAAAA
TTGGCCAAAA
Reverse complement¶
void ivs::reverse_complement_rank<Alphabet>(std::span<uint8_t const> in, std::span<uint8_t> out)
auto ivs::reverse_complement_rank<Alphabet>(std::span<uint8_t const> in) -> std::vector<uint8_t>
auto ivs::view_reverse_complement_rank<Alphabet> = /*unspecified*/
void ivs::reverse_complement_char<Alphabet>(std::span<char const> in, std::span<char> out)
auto ivs::reverse_complement_char<Alphabet>(std::span<char const> in) -> std::string
auto ivs::view_reverse_complement_char<Alphabet> = /*unspecified*/
Computes the reverse complement according to Alphabet
. It is required that Alphabet
has the concept alphabet_with_complement_c
.
Version 1, 3, 4 and 6 never throw. Version 2 and 5 might throw inside of std::vector
or std::string
.
Invalid ranks in *_rank
functions will be converted to 0
.
In the *_char
version, invalid letters will be converted to \0
.
Example¶
// SPDX-FileCopyrightText: 2006-2023, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <ivsigma/ivsigma.h>
#include <iostream>
int main()
{
{ // reverse_complement_rank
// A C G T
auto input = std::vector<uint8_t>{ 0, 1, 2, 3};
{ // Version 1
auto output = std::vector<uint8_t>{};
output.resize(input.size());
ivs::reverse_complement_rank<ivs::dna4>(input, output);
for (auto r : output) {
std::cout << (int)r;
}
std::cout << '\n';
}
{ // Version 2
std::vector<uint8_t> output = ivs::reverse_complement_rank<ivs::dna4>(input);
for (auto r : output) {
std::cout << (int)r;
}
std::cout << '\n';
}
{ // Version 3
auto output_view = ivs::view_reverse_complement_rank<ivs::dna4>(input);
auto output = std::vector<uint8_t>(output_view.begin(), output_view.end());
for (auto r : output) {
std::cout << (int)r;
}
std::cout << '\n';
}
}
{ // reverse_complement_char
auto input = std::string{"AaCcGgTtUu"};
{ // Version 4
auto output = std::string{};
output.resize(input.size());
ivs::reverse_complement_char<ivs::dna4>(input, output);
std::cout << output << '\n';
}
{ // Version 5
std::string output = ivs::reverse_complement_char<ivs::dna4>(input);
std::cout << output << '\n';
}
{ // Version 6
auto output_view = input | ivs::view_reverse_complement_char<ivs::dna4>;
auto output = std::string(output_view.begin(), output_view.end());
std::cout << output << '\n';
}
}
}
0123
0123
0123
AAAACCGGTT
AAAACCGGTT
AAAACCGGTT
Verification¶
std::optional<size_t> verify_char(std::span<char const> in)
std::optional<size_t> verify_rank(std::span<uint8_t const> in)
Verifies if characters and ranks are valid. Invalid ranks are 255
and invalid characters \0
. If an invalid
char/rank is found, this function reports its position. These function never throw.
Example¶
// SPDX-FileCopyrightText: 2006-2023, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <ivsigma/ivsigma.h>
#include <iostream>
int main()
{
{ // reverse_complement_rank
// A C G T
auto input = std::vector<uint8_t>{ 0, 1, 2, 3};
{ // Version 1
auto output = std::vector<uint8_t>{};
output.resize(input.size());
ivs::reverse_complement_rank<ivs::dna4>(input, output);
for (auto r : output) {
std::cout << (int)r;
}
std::cout << '\n';
}
{ // Version 2
std::vector<uint8_t> output = ivs::reverse_complement_rank<ivs::dna4>(input);
for (auto r : output) {
std::cout << (int)r;
}
std::cout << '\n';
}
{ // Version 3
auto output_view = ivs::view_reverse_complement_rank<ivs::dna4>(input);
auto output = std::vector<uint8_t>(output_view.begin(), output_view.end());
for (auto r : output) {
std::cout << (int)r;
}
std::cout << '\n';
}
}
{ // reverse_complement_char
auto input = std::string{"AaCcGgTtUu"};
{ // Version 4
auto output = std::string{};
output.resize(input.size());
ivs::reverse_complement_char<ivs::dna4>(input, output);
std::cout << output << '\n';
}
{ // Version 5
std::string output = ivs::reverse_complement_char<ivs::dna4>(input);
std::cout << output << '\n';
}
{ // Version 6
auto output_view = input | ivs::view_reverse_complement_char<ivs::dna4>;
auto output = std::string(output_view.begin(), output_view.end());
std::cout << output << '\n';
}
}
}
0123
0123
0123
AAAACCGGTT
AAAACCGGTT
AAAACCGGTT