Bifrost
UnitigMap.hpp
Go to the documentation of this file.
1 #ifndef BFG_UNITIGMAP_HPP
2 #define BFG_UNITIGMAP_HPP
3 
4 #include <string>
5 #include "Common.hpp"
6 #include "Kmer.hpp"
7 
13 template<typename U> class Unitig;
14 template<typename U, typename G> class CompactedDBG;
15 template<typename U, typename G, bool is_const> class BackwardCDBG;
16 template<typename U, typename G, bool is_const> class ForwardCDBG;
17 template<typename U, typename G, bool is_const> class neighborIterator;
18 
33 struct UnitigMapBase {
34 
39  UnitigMapBase(const size_t length = 1);
40 
48  UnitigMapBase(const size_t start, const size_t length, const size_t unitig_sz, const bool strand);
49 
53  bool operator==(const UnitigMapBase& o) const;
54 
58  bool operator!=(const UnitigMapBase& o) const;
59 
60  size_t dist;
61  size_t len;
62  size_t size;
63 
64  bool strand;
65  bool isEmpty;
66 };
67 
91 template<typename Unitig_data_t = void, typename Graph_data_t = void, bool is_const = false>
92 class UnitigMap : public UnitigMapBase {
93 
94  typedef Unitig_data_t U;
95  typedef Graph_data_t G;
96 
97  template<typename U, typename G> friend class CompactedDBG;
98  template<typename U, typename G, bool C> friend class BackwardCDBG;
99  template<typename U, typename G, bool C> friend class ForwardCDBG;
100  template<typename U, typename G, bool C> friend class unitigIterator;
101  template<typename U, typename G, bool C> friend class UnitigMap;
102 
103  typedef typename std::conditional<is_const, const CompactedDBG<U, G>*, CompactedDBG<U, G>*>::type CompactedDBG_ptr_t;
104  typedef typename std::conditional<is_const, const U*, U*>::type Unitig_data_ptr_t;
105 
106  public:
107 
108  typedef BackwardCDBG<U, G, is_const> UnitigMap_BW;
109  typedef ForwardCDBG<U, G, is_const> UnitigMap_FW;
110 
116  UnitigMap(size_t length = 1, CompactedDBG_ptr_t cdbg_ = nullptr);
117 
126  UnitigMap(const size_t start, const size_t length, const size_t unitig_sz, const bool strand);
127 
131  bool operator==(const UnitigMap& o) const;
132 
136  bool operator!=(const UnitigMap& o) const;
137 
142  string mappedSequenceToString() const;
143 
148  string referenceUnitigToString() const;
149 
162  size_t lcp(const char* s, const size_t pos_s = 0, const size_t pos_um_seq = 0, const bool um_reversed = false) const;
163 
168  Kmer getUnitigHead() const;
169 
174  Kmer getUnitigTail() const;
175 
182  Kmer getUnitigKmer(const size_t pos) const;
183 
188  Kmer getMappedHead() const;
189 
194  Kmer getMappedTail() const;
195 
202  Kmer getMappedKmer(const size_t pos) const;
203 
208  UnitigMap<U, G, is_const> getKmerMapping(const size_t pos) const;
209 
217  Unitig_data_ptr_t getData() const;
218 
224  UnitigMap_BW getPredecessors() const;
225 
231  UnitigMap_FW getSuccessors() const;
232 
238  inline CompactedDBG_ptr_t getGraph() const { return cdbg; }
239 
240  operator UnitigMap<U, G, true>() const {
241 
242  UnitigMap<U, G, true> um(pos_unitig, dist, len, size, isShort, isAbundant, strand, cdbg);
243 
244  um.isEmpty = isEmpty;
245 
246  return um;
247  }
248 
249  void setFullCoverage() const;
250  void increaseCoverage() const;
251  void decreaseCoverage() const;
252 
253  bool isCoverageFull() const;
254  size_t getCoverage(const size_t pos) const;
255 
256  private:
257 
258  UnitigMap(size_t p_unitig, size_t i, size_t l, size_t sz, bool short_, bool abundance, bool strd, CompactedDBG_ptr_t cdbg_);
259 
260  neighborIterator<U, G, is_const> bw_begin() const;
261  neighborIterator<U, G, is_const> bw_end() const;
262 
263  neighborIterator<U, G, is_const> fw_begin() const;
264  neighborIterator<U, G, is_const> fw_end() const;
265 
266  template<bool is_void> typename std::enable_if<!is_void, Unitig<U>>::type splitData_(const bool last_split) const;
267  template<bool is_void> typename std::enable_if<is_void, Unitig<U>>::type splitData_(const bool last_split) const;
268 
269  Unitig<U> splitData(const bool last_split) const;
270 
271  template<bool is_void> typename std::enable_if<!is_void, Unitig_data_ptr_t>::type getData_() const;
272  template<bool is_void> typename std::enable_if<is_void, Unitig_data_ptr_t>::type getData_() const;
273 
274  void partialCopy(const UnitigMap<U, G, is_const>& um);
275 
276  size_t pos_unitig; // unitig pos. in v_unitigs or v_kmers or h_kmers
277 
278  bool isShort; // true if the unitig has length k
279  bool isAbundant; // true if the unitig has length k and has an abundant minimizer
280 
281  CompactedDBG_ptr_t cdbg;
282 };
283 
284 template<typename Unitig_data_t = void, typename Graph_data_t = void, bool is_const = false>
286 
287  typedef Unitig_data_t U;
288  typedef Graph_data_t G;
289 
290  size_t operator()(const UnitigMap<U, G, is_const>& um) const {
291 
292  struct UnitigMapTMP {
293 
294  size_t pos_unitig; // unitig pos. in v_unitigs or v_kmers or h_kmers
295  size_t dist;
296  size_t len;
297  size_t size;
298 
299  bool strand;
300  bool isEmpty;
301 
302  bool isShort; // true if the unitig has length k
303  bool isAbundant; // true if the unitig has length k and has an abundant minimizer
304 
305  const void* cdbg;
306 
307  UnitigMapTMP(const UnitigMap<U, G, is_const>& um) : pos_unitig(um.pos_unitig), dist(um.dist), len(um.len), size(um.size),
308  strand(um.strand), isEmpty(um.isEmpty), isShort(um.isShort),
309  isAbundant(um.isAbundant), cdbg(static_cast<const void*>(um.cdbg)) {};
310  };
311 
312  UnitigMapTMP tmp(um);
313 
314  return static_cast<size_t>(XXH64(static_cast<const void*>(&tmp), sizeof(UnitigMapTMP), 0));
315  }
316 };
317 
318 #include "UnitigMap.tcc"
319 
320 #endif
string mappedSequenceToString() const
Create a string containing the sequence corresponding to the mapping.
Iterator for the neighbors (predecessors or successors) of a reference unitig used in a UnitigMap obj...
Definition: NeighborIterator.hpp:34
Iterator for the unitigs of a Compacted de Bruijn graph.
Definition: UnitigIterator.hpp:36
size_t len
Length of the mapping on the reference unitig, in k-mers.
Definition: UnitigMap.hpp:61
Kmer getUnitigKmer(const size_t pos) const
Get the k-mer starting at position pos in the reference unitig used for the mapping.
Unitig_data_ptr_t getData() const
Get a pointer to the data associated with the reference unitig used in the mapping.
bool operator!=(const UnitigMapBase &o) const
Inequality operator: check if two UnitigMapBase are different.
UnitigMap_FW getSuccessors() const
Create a UnitigMap_FW object that can create iterators (through UnitigMap_FW::begin() and UnitigMap_F...
Represent a Compacted de Bruijn graph.
Definition: CompactedDBG.hpp:297
UnitigMap_BW getPredecessors() const
Create a UnitigMap_BW object that can create iterators (through UnitigMap_BW::begin() and UnitigMap_B...
Definition: UnitigMap.hpp:285
Kmer getMappedTail() const
Get the tail k-mer of the mapped sequence.
size_t size
Length of the reference unitig.
Definition: UnitigMap.hpp:62
size_t lcp(const char *s, const size_t pos_s=0, const size_t pos_um_seq=0, const bool um_reversed=false) const
Compute the length of the longest common prefix between a given sequence and the reference unitig use...
CompactedDBG_ptr_t getGraph() const
Get a pointer to the CompactedDBG containing the reference unitig used in the mapping.
Definition: UnitigMap.hpp:238
Interface to store and manipulate k-mers.
Definition: Kmer.hpp:40
Contain all the information for the mapping of a k-mer or a sequence to a unitig of a Compacted de Br...
Definition: NeighborIterator.hpp:12
Wrapper for class neighborIterator to iterate over the predecessors of a reference unitig used in a U...
Definition: NeighborIterator.hpp:116
Interface for the class Kmer:
Kmer getUnitigTail() const
Get the tail k-mer of the reference unitig used for the mapping.
bool strand
True if the mapped k-mer or sequence matches the forward strand, false if it matches its reverse-comp...
Definition: UnitigMap.hpp:64
UnitigMap< U, G, is_const > getKmerMapping(const size_t pos) const
Create a new UnitigMap object which is the mapping of a k-mer on a reference unitig.
Kmer getUnitigHead() const
Get the head k-mer of the reference unitig used for the mapping.
Kmer getMappedHead() const
Get the head k-mer of the mapped sequence.
size_t dist
Start position of the mapping (0-based distance) from the start of the reference unitig.
Definition: UnitigMap.hpp:60
Kmer getMappedKmer(const size_t pos) const
Get the k-mer starting at position pos in the mapped sequence.
UnitigMapBase(const size_t length=1)
UnitigMapBase constructor (isEmpty = true).
Wrapper for class neighborIterator to iterate over the predecessors of a reference unitig used in a U...
Definition: NeighborIterator.hpp:162
Structure containing the basic information of a unitig mapping.
Definition: UnitigMap.hpp:33
Represent a unitig which is a vertex of the Compacted de Bruijn graph.
Definition: Unitig.hpp:22
string referenceUnitigToString() const
Create a string containing the sequence of the reference unitig used the mapping. ...
bool isEmpty
True if there is no mapping.
Definition: UnitigMap.hpp:65
bool operator==(const UnitigMapBase &o) const
Equality operator: check if two UnitigMapBase are the same.