ProteoWizard
SpectrumListCacheTest.cpp
Go to the documentation of this file.
1//
2// $Id$
3//
4//
5// Original author: Matt Chambers <matt.chambers <a.t> vanderbilt.edu>
6//
7// Copyright 2008 Vanderbilt University - Nashville, TN 37232
8//
9// Licensed under the Apache License, Version 2.0 (the "License");
10// you may not use this file except in compliance with the License.
11// You may obtain a copy of the License at
12//
13// http://www.apache.org/licenses/LICENSE-2.0
14//
15// Unless required by applicable law or agreed to in writing, software
16// distributed under the License is distributed on an "AS IS" BASIS,
17// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18// See the License for the specific language governing permissions and
19// limitations under the License.
20//
21
22
24#include "MSDataFile.hpp"
25#include "MemoryMRUCache.hpp"
26#include "SpectrumListCache.hpp"
28#include "Serializer_MGF.hpp"
29
30
31using namespace pwiz::util;
32using namespace pwiz::cv;
33using namespace pwiz::msdata;
34//using namespace pwiz::analysis;
35
36
37ostream* os_ = 0;
38
39
40namespace std {
41
42ostream& operator<< (ostream& os, SpectrumListCache::CacheType& cache)
43{
44 os << "Spectrum cache indices (from MRU to LRU):";
45 for (SpectrumListCache::CacheType::iterator itr = cache.begin(); itr != cache.end(); ++itr)
46 os << " " << itr->spectrum->index;
47 return os;
48}
49
50} // namespace std
51
52
84
85
86SpectrumPtr makeSpectrumPtr(size_t index, const string& id)
87{
88 SpectrumPtr spectrum(new Spectrum);
89 spectrum->id = id;
90 spectrum->index = index;
91 spectrum->set(MS_MSn_spectrum);
92 spectrum->set(MS_ms_level, 2);
93 spectrum->precursors.push_back(Precursor(123.4));
94 spectrum->setMZIntensityArrays(vector<double>(), vector<double>(), MS_number_of_detector_counts);
95 BinaryDataArray& mzArray = *spectrum->getMZArray();
96 BinaryDataArray& intensityArray = *spectrum->getIntensityArray();
97 for (size_t i=0; i < (index+1)*10; ++i)
98 {
99 mzArray.data.push_back(i);
100 intensityArray.data.push_back(i*100);
101 }
102 spectrum->defaultArrayLength = mzArray.data.size();
103 return spectrum;
104}
105
107{
108 return s.dataProcessingPtr.get() ||
109 s.sourceFilePtr.get() ||
110 !s.scanList.empty() ||
111 !s.precursors.empty() ||
112 !s.paramGroupPtrs.empty() ||
113 !s.cvParams.empty() ||
114 !s.userParams.empty();
115}
116
118{
119 return s.hasBinaryData();
120}
121
123{
124 // initialize list
125 shared_ptr<SpectrumListSimple> sl(new SpectrumListSimple);
126 sl->spectra.push_back(makeSpectrumPtr(0, "S1"));
127 sl->spectra.push_back(makeSpectrumPtr(1, "S2"));
128 sl->spectra.push_back(makeSpectrumPtr(2, "S3"));
129 sl->spectra.push_back(makeSpectrumPtr(3, "S4"));
130
131 // access a series of spectra and make sure the cache behaves appropriately:
132 // in off mode, the cache should always be empty
133
134 SpectrumPtr s;
135
138
139 unit_assert(cache.empty());
140
141 s = slc.spectrum(0, false);
142 s = slc.spectrum(1, true);
143 s = slc.spectrum(2, false);
144 s = slc.spectrum(3, true);
145
146 if (os_) *os_ << cache << endl;
147 unit_assert(cache.empty());
148}
149
150
152{
153 // initialize list
154 MSData msd;
155 shared_ptr<SpectrumListSimple> sl(new SpectrumListSimple);
156 sl->spectra.push_back(makeSpectrumPtr(0, "S1"));
157 sl->spectra.push_back(makeSpectrumPtr(1, "S2"));
158 sl->spectra.push_back(makeSpectrumPtr(2, "S3"));
159 sl->spectra.push_back(makeSpectrumPtr(3, "S4"));
160 msd.run.spectrumListPtr = sl;
161
162 // SpectrumListSimple returns the same shared_ptrs regardless of caching;
163 // serializing to MGF and back will produce different shared_ptrs
164 boost::shared_ptr<stringstream> ss(new stringstream);
165 Serializer_MGF serializer;
166 serializer.write(*ss, msd, 0);
167 serializer.read(ss, msd);
168
169 // access a series of spectra and make sure the cache behaves appropriately:
170 // in metadata-only mode, entries in the cache should:
171 // - always have metadata
172 // - never have binary data
173
174 SpectrumPtr s;
175
178
179 unit_assert(cache.empty());
181
182 s = slc.spectrum(0, false);
183
184 // pointers should be equal
185 unit_assert_operator_equal(slc.spectrum(0, false), s);
186
187 if (os_) *os_ << cache << endl;
188 unit_assert(!cache.empty());
190 unit_assert_operator_equal(0, cache.mru().spectrum->index);
191
192 // with-binary-data access should return the binary data, but only cache the metadata
193 s = slc.spectrum(1, true);
194
195 if (os_) *os_ << cache << endl;
197 unit_assert_operator_equal(1, cache.mru().spectrum->index);
198 unit_assert(spectrumHasMetadata(*cache.mru().spectrum));
199 unit_assert(!spectrumHasBinaryData(*cache.mru().spectrum));
200 unit_assert(spectrumHasMetadata(*cache.lru().spectrum));
201 unit_assert_operator_equal(0, cache.lru().spectrum->index);
202
203 s = slc.spectrum(2, false);
204
205 // pointers should be equal
206 unit_assert_operator_equal(slc.spectrum(2, false), s);
207
208 if (os_) *os_ << cache << endl;
210 unit_assert_operator_equal(2, cache.mru().spectrum->index);
211 unit_assert(spectrumHasMetadata(*cache.mru().spectrum));
212 unit_assert(!spectrumHasBinaryData(*cache.mru().spectrum));
213 unit_assert_operator_equal(1, cache.lru().spectrum->index);
214
215 s = slc.spectrum(3, true);
216
217 if (os_) *os_ << cache << endl;
219 unit_assert_operator_equal(3, cache.mru().spectrum->index);
220 unit_assert(spectrumHasMetadata(*cache.mru().spectrum));
221 unit_assert(!spectrumHasBinaryData(*cache.mru().spectrum));
222 unit_assert_operator_equal(2, cache.lru().spectrum->index);
223 unit_assert(spectrumHasMetadata(*cache.lru().spectrum));
224
225 s = slc.spectrum(2, true);
226
227 if (os_) *os_ << cache << endl;
229 unit_assert_operator_equal(2, cache.mru().spectrum->index);
230 unit_assert(spectrumHasMetadata(*cache.mru().spectrum));
231 unit_assert(!spectrumHasBinaryData(*cache.mru().spectrum));
232 unit_assert_operator_equal(3, cache.lru().spectrum->index);
233 unit_assert(spectrumHasMetadata(*cache.lru().spectrum));
234}
235
236
238{
239 // initialize list
240 MSData msd;
241 shared_ptr<SpectrumListSimple> sl(new SpectrumListSimple);
242 sl->spectra.push_back(makeSpectrumPtr(0, "S1"));
243 sl->spectra.push_back(makeSpectrumPtr(1, "S2"));
244 sl->spectra.push_back(makeSpectrumPtr(2, "S3"));
245 sl->spectra.push_back(makeSpectrumPtr(3, "S4"));
246 msd.run.spectrumListPtr = sl;
247
248 // SpectrumListSimple returns the same shared_ptrs regardless of caching;
249 // serializing to MGF and back will produce different shared_ptrs
250 boost::shared_ptr<stringstream> ss(new stringstream);
251 Serializer_MGF serializer;
252 serializer.write(*ss, msd, 0);
253 serializer.read(ss, msd);
254
255 // access a series of spectra and make sure the cache behaves appropriately:
256 // in binary-data-only mode, entries in the cache should:
257 // - never have metadata
258 // - always have binary data
259
260 SpectrumPtr s;
261
264
265 unit_assert(cache.empty());
267
268 // metadata-only access should not affect the cache
269 s = slc.spectrum(0, false);
270
271 if (os_) *os_ << cache << endl;
272 unit_assert(cache.empty());
274
275 // with-binary-data access should be cached without the metadata
276 s = slc.spectrum(1, true);
277
278 if (os_) *os_ << cache << endl;
280 unit_assert_operator_equal(1, cache.mru().spectrum->index);
281 unit_assert(!spectrumHasMetadata(*cache.mru().spectrum));
282 unit_assert(spectrumHasBinaryData(*cache.mru().spectrum));
283
284 s = slc.spectrum(2, false);
285
286 if (os_) *os_ << cache << endl;
288 unit_assert_operator_equal(1, cache.mru().spectrum->index);
289 unit_assert(!spectrumHasMetadata(*cache.mru().spectrum));
290 unit_assert(spectrumHasBinaryData(*cache.mru().spectrum));
291
292 s = slc.spectrum(3, true);
293
294 if (os_) *os_ << cache << endl;
296 unit_assert_operator_equal(3, cache.mru().spectrum->index);
297 unit_assert(!spectrumHasMetadata(*cache.mru().spectrum));
298 unit_assert(spectrumHasBinaryData(*cache.mru().spectrum));
299 unit_assert_operator_equal(1, cache.lru().spectrum->index);
300 unit_assert(!spectrumHasMetadata(*cache.lru().spectrum));
301 unit_assert(spectrumHasBinaryData(*cache.lru().spectrum));
302
303 s = slc.spectrum(1, true);
304
305 if (os_) *os_ << cache << endl;
307 unit_assert_operator_equal(1, cache.mru().spectrum->index);
308 unit_assert(!spectrumHasMetadata(*cache.mru().spectrum));
309 unit_assert(spectrumHasBinaryData(*cache.mru().spectrum));
310 unit_assert_operator_equal(3, cache.lru().spectrum->index);
311 unit_assert(!spectrumHasMetadata(*cache.lru().spectrum));
312 unit_assert(spectrumHasBinaryData(*cache.lru().spectrum));
313}
314
315
317{
318 // initialize list
319 MSData msd;
320 shared_ptr<SpectrumListSimple> sl(new SpectrumListSimple);
321 sl->spectra.push_back(makeSpectrumPtr(0, "S1"));
322 sl->spectra.push_back(makeSpectrumPtr(1, "S2"));
323 sl->spectra.push_back(makeSpectrumPtr(2, "S3"));
324 sl->spectra.push_back(makeSpectrumPtr(3, "S4"));
325 msd.run.spectrumListPtr = sl;
326
327 // SpectrumListSimple returns the same shared_ptrs regardless of caching;
328 // serializing to MGF and back will produce different shared_ptrs
329 boost::shared_ptr<stringstream> ss(new stringstream);
330 Serializer_MGF serializer;
331 serializer.write(*ss, msd, 0);
332 serializer.read(ss, msd);
333
334 // access a series of spectra and make sure the cache behaves appropriately:
335 // in metadata-and-binary-data mode, entries in the cache should:
336 // - always have metadata
337 // - always have binary data
338
339 SpectrumPtr s;
340
341 SpectrumListCache slc(msd.run.spectrumListPtr, MemoryMRUCacheMode_MetaDataAndBinaryData, 2);
343
344 unit_assert(cache.empty());
346
347 // metadata-only access should not affect the cache
348 s = slc.spectrum(0, false);
349
350 if (os_) *os_ << cache << endl;
351 unit_assert(cache.empty());
353
354 s = slc.spectrum(1, true);
355
356 // pointers should be equal
357 unit_assert_operator_equal(slc.spectrum(1, true), s);
358
359 if (os_) *os_ << cache << endl;
361 unit_assert_operator_equal(1, cache.mru().spectrum->index);
362 unit_assert(spectrumHasMetadata(*cache.mru().spectrum));
363 unit_assert(spectrumHasBinaryData(*cache.mru().spectrum));
364
365 s = slc.spectrum(2, false);
366
367 if (os_) *os_ << cache << endl;
369 unit_assert_operator_equal(1, cache.mru().spectrum->index);
370 unit_assert(spectrumHasMetadata(*cache.mru().spectrum));
371 unit_assert(spectrumHasBinaryData(*cache.mru().spectrum));
372
373 s = slc.spectrum(3, true);
374
375 // pointers should be equal
376 unit_assert_operator_equal(slc.spectrum(3, true), s);
377
378 if (os_) *os_ << cache << endl;
380 unit_assert_operator_equal(3, cache.mru().spectrum->index);
381 unit_assert(spectrumHasMetadata(*cache.mru().spectrum));
382 unit_assert(spectrumHasBinaryData(*cache.mru().spectrum));
383 unit_assert_operator_equal(1, cache.lru().spectrum->index);
384 unit_assert(spectrumHasMetadata(*cache.lru().spectrum));
385 unit_assert(spectrumHasBinaryData(*cache.lru().spectrum));
386
387 s = slc.spectrum(2, true);
388
389 if (os_) *os_ << cache << endl;
391 unit_assert_operator_equal(2, cache.mru().spectrum->index);
392 unit_assert(spectrumHasMetadata(*cache.mru().spectrum));
393 unit_assert(spectrumHasBinaryData(*cache.mru().spectrum));
394 unit_assert_operator_equal(3, cache.lru().spectrum->index);
395 unit_assert(spectrumHasMetadata(*cache.lru().spectrum));
396 unit_assert(spectrumHasBinaryData(*cache.lru().spectrum));
397}
398
399void testFileReads(const char *filename) {
400 std::string srcparent(__FILE__); // locate test data relative to this source file
401 // something like \ProteoWizard\pwiz\pwiz\data\msdata\SpectrumListCacheTest.cpp
402 size_t pos = srcparent.rfind("pwiz");
403 srcparent.resize(pos);
404 std::string example_data_dir = srcparent + "example_data/";
405 pwiz::msdata::MSDataFile msd1(example_data_dir + filename);
407 pwiz::msdata::MSDataFile msd2(example_data_dir + filename);
408 // test logic for efficient delayed read of binary data -
409 // we try to avoid reparsing the header since we have that cached
410 // mzML and mzXML readers can do this, others could probably be made to
411 int index = 3;
412 SpectrumPtr s=msd2.run.spectrumListPtr->spectrum(index, false);
413 SpectrumPtr c=cache.spectrum(index, false);
414 unit_assert(*s==*c);
415 unit_assert(!s->hasBinaryData());
416 unit_assert(!c->hasBinaryData());
417 s=msd2.run.spectrumListPtr->spectrum(index, true);
418 c=cache.spectrum(index, true);
419 unit_assert(*s==*c);
420 unit_assert(s->hasBinaryData());
421 unit_assert(c->hasBinaryData());
422 unit_assert(s->binaryDataArrayPtrs[0]->data[0]==
423 c->binaryDataArrayPtrs[0]->data[0]);
424 unit_assert(!s->binaryDataArrayPtrs[1]->data.empty());
425 unit_assert(!c->binaryDataArrayPtrs[1]->data.empty());
426 unit_assert(s->binaryDataArrayPtrs[1]->data[0]==
427 c->binaryDataArrayPtrs[1]->data[0]);
428}
429
430
431void test()
432{
434 testModeOff();
438 // check the delayed-binary-read
439 // logic for mzML and mzXML readers
440 testFileReads("tiny.pwiz.mzXML");
441 testFileReads("tiny.pwiz.1.0.mzML");
442 testFileReads("tiny.pwiz.1.1.mzML");
443}
444
445
446int main(int argc, char* argv[])
447{
448 TEST_PROLOG(argc, argv)
449
450 try
451 {
452 if (argc>1 && !strcmp(argv[1],"-v")) os_ = &cout;
453 test();
454 }
455 catch (exception& e)
456 {
457 TEST_FAILED(e.what())
458 }
459 catch (...)
460 {
461 TEST_FAILED("Caught unknown exception.")
462 }
463
465}
MemoryMRUCacheMode_MetaDataOnly
MemoryMRUCacheMode_BinaryDataOnly
MemoryMRUCacheMode_Off
void testFileReads(const char *filename)
int main(int argc, char *argv[])
bool spectrumHasBinaryData(const Spectrum &s)
void testModeMetaDataAndBinaryData()
void testMemoryMRUCache()
void testModeBinaryDataOnly()
SpectrumPtr makeSpectrumPtr(size_t index, const string &id)
bool spectrumHasMetadata(const Spectrum &s)
void testModeMetaDataOnly()
void testModeOff()
ostream * os_
void test()
MSData <-> MGF stream serialization.
void read(boost::shared_ptr< std::istream > is, MSData &msd) const
read in MSData object from an MGF istream note: istream may be managed by MSData's SpectrumList,...
void write(std::ostream &os, const MSData &msd, const pwiz::util::IterationListenerRegistry *iterationListenerRegistry=0) const
write MSData object to ostream as MGF; iterationListenerRegistry may be used to receive progress upda...
adds a level of flexible MRU caching to a SpectrumList processor chain
virtual SpectrumPtr spectrum(size_t index, bool getBinaryData=false) const
returns the requested spectrum which may or may not be cached depending on the current cache mode
CacheType & spectrumCache()
returns a reference to the cache, to enable clearing it or changing the mode
void push_back(const T &value)
bool empty() const
Definition mru_list.hpp:91
std::size_t max_size() const
Definition mru_list.hpp:93
const item_type & mru() const
Definition mru_list.hpp:96
std::size_t size() const
Definition mru_list.hpp:92
const item_type & lru() const
Definition mru_list.hpp:97
item_list::iterator iterator
Definition mru_list.hpp:63
bool insert(const item_type &item)
Definition mru_list.hpp:71
MS_ms_level
ms level: Stages of ms achieved in a multi stage mass spectrometry experiment.
Definition cv.hpp:2139
MS_MSn_spectrum
MSn spectrum: MSn refers to multi-stage MS2 experiments designed to record product ion spectra where ...
Definition cv.hpp:2364
MS_number_of_detector_counts
number of detector counts: The number of counted events observed in one or a group of elements of a d...
Definition cv.hpp:741
boost::shared_ptr< Spectrum > SpectrumPtr
Definition MSData.hpp:573
STL namespace.
ostream & operator<<(ostream &os, SpectrumListCache::CacheType &cache)
std::vector< ParamGroupPtr > paramGroupPtrs
a collection of references to ParamGroups
std::vector< CVParam > cvParams
a collection of controlled vocabulary terms
std::vector< UserParam > userParams
a collection of uncontrolled user terms
The structure into which encoded binary data goes. Byte ordering is always little endian (Intel style...
Definition MSData.hpp:405
pwiz::util::BinaryData< double > data
the binary data.
Definition MSData.hpp:410
MSData object plus file I/O.
This is the root element of ProteoWizard; it represents the mzML element, defined as: intended to cap...
Definition MSData.hpp:850
Run run
a run in mzML should correspond to a single, consecutive and coherent set of scans on an instrument.
Definition MSData.hpp:886
The method of precursor ion selection and activation.
Definition MSData.hpp:312
SpectrumListPtr spectrumListPtr
all mass spectra and the acquisitions underlying them are described and attached here....
Definition MSData.hpp:827
The structure that captures the generation of a peak list (including the underlying acquisitions)
Definition MSData.hpp:506
SourceFilePtr sourceFilePtr
this attribute can optionally reference the 'id' of the appropriate sourceFile.
Definition MSData.hpp:514
DataProcessingPtr dataProcessingPtr
this attribute can optionally reference the 'id' of the appropriate dataProcessing.
Definition MSData.hpp:511
bool hasBinaryData() const
returns true iff has nonnull and nonempty BinaryDataArrayPtr
Definition MSData.hpp:535
ScanList scanList
list of scans
Definition MSData.hpp:517
std::vector< Precursor > precursors
list and descriptions of precursors to the spectrum currently being described.
Definition MSData.hpp:520
a cache mapping spectrum indices to SpectrumPtrs
Simple writeable in-memory implementation of SpectrumList.
Definition MSData.hpp:717
#define unit_assert(x)
Definition unit.hpp:85
#define TEST_EPILOG
Definition unit.hpp:183
#define TEST_FAILED(x)
Definition unit.hpp:177
#define unit_assert_operator_equal(expected, actual)
Definition unit.hpp:92
#define TEST_PROLOG(argc, argv)
Definition unit.hpp:175