libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
sagereader.cpp
Go to the documentation of this file.
1/**
2 * \file input/sage/sagereader.cpp
3 * \date 21/08/2024
4 * \author Olivier Langella
5 * \brief read data files from Sage output
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2024 Olivier Langella
10 *<Olivier.Langella@universite-paris-saclay.fr>.
11 *
12 * This file is part of i2MassChroQ.
13 *
14 * i2MassChroQ is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation, either version 3 of the License, or
17 * (at your option) any later version.
18 *
19 * i2MassChroQ is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with i2MassChroQ. If not, see <http://www.gnu.org/licenses/>.
26 *
27 ******************************************************************************/
28
29#include "sagereader.h"
30#include <QJsonObject>
31#include <QJsonArray>
32#include <odsstream/tsvreader.h>
33#include <odsstream/odsexception.h>
34#include <QUrl>
35#include <qcontainerfwd.h>
36#include <qfileinfo.h>
37#include <qlogging.h>
38#include <qobject.h>
39#include "sagetsvhandler.h"
44
47 const pappso::cbor::psm::SageFileReader &sage_file_reader,
48 const QString &sage_json_file)
49 : m_sageFileReader(sage_file_reader)
50{
51 mp_monitor = p_monitor;
52 mp_cborWriter = p_output;
53 m_jsonAbsoluteFilePath = sage_json_file;
54}
55
59
60const QString &
65
66
67void
72
78
79void
81 const QString &sequence_in)
82{
83
84 QStringList description_split = description_in.split(" ", Qt::SkipEmptyParts);
85 QString accession = description_split.at(0);
86 try
87 {
88 const PsmProtein &psm_protein = mp_self->m_psmProteinMap.getByAccession(accession);
89 psm_protein.protein_sp.get()->setSequence(sequence_in);
90 psm_protein.protein_sp.get()->setDescription(description_in);
91 }
93 {
94 }
95 try
96 {
97 QString rev_accession = accession.prepend(m_decoyTag);
98 const PsmProtein &psm_protein = mp_self->m_psmProteinMap.getByAccession(rev_accession);
99 psm_protein.protein_sp.get()->setSequence(sequence_in);
100
101 description_split[0] = rev_accession;
102 psm_protein.protein_sp.get()->setDescription(description_split.join(" "));
103 psm_protein.protein_sp.get()->reverse();
104 }
105 catch(pappso::ExceptionNotFound &err)
106 {
107 }
108}
109
115
116
117void
119{
120
121 extractMzmlPathList(m_sageFileReader.getJsonDocument());
122 // getTsvFilePath(mp_identificationDataSource->getJsonDocument().object());
123 QString file_str = getTsvFilePath(m_sageFileReader.getJsonDocument());
124 QFileInfo tsv_file_info(file_str);
126 try
127 {
128 TsvReader tsv_reader(handler);
129
130 QFile tsv_file(tsv_file_info.absoluteFilePath());
131 tsv_reader.parse(tsv_file);
132 tsv_file.close();
133 }
134 catch(OdsException &error_ods)
135 {
136 throw pappso::PappsoException(QObject::tr("Error reading %1 file:\n %2")
137 .arg(tsv_file_info.absoluteFilePath())
138 .arg(error_ods.qwhat()));
139 }
140
141
142 // collect protein sequences
143 QFile fastaFile(getFastaFilePath(m_sageFileReader.getJsonDocument()));
144 SageReader::FastaSeq seq(this);
145 pappso::FastaReader reader(seq);
146 reader.parse(fastaFile);
147
148 qDebug();
149 mp_cborWriter->append("protein_map");
151
152
153 mp_cborWriter->append("sample_list");
154 mp_cborWriter->startArray();
155 try
156 {
157 handler.writeSampleList();
158 }
159 catch(OdsException &error_ods)
160 {
161 throw pappso::PappsoException(QObject::tr("Error reading %1 file:\n %2")
162 .arg(tsv_file_info.absoluteFilePath())
163 .arg(error_ods.qwhat()));
164 }
165
166 mp_cborWriter->endArray();
167}
168
169void
171{
172
173 QJsonObject sage_object = json_doc.object();
174 QJsonValue json_mzml_path_list = sage_object.value("mzml_paths");
175 if(json_mzml_path_list.isUndefined())
176 {
177 throw pappso::ExceptionNotFound(QObject::tr("mzml_paths not found in Sage json document"));
178 }
179 m_mzmlPathList.clear();
180
181 for(auto path_mzml : json_mzml_path_list.toArray())
182 {
183 m_mzmlPathList << convertToLocalFileOrDie(path_mzml.toString());
184 }
185}
186
187const QString &
188pappso::cbor::psm::SageReader::getMzmlPath(const QString &file_msrun) const
189{
190 for(auto &file_path : m_mzmlPathList)
191 {
192 if(file_path.endsWith(file_msrun))
193 return file_path;
194 }
196 QObject::tr("MS run %1 not found in Sage json document").append(file_msrun));
197}
198
199
200QString
202{
203 QString path;
204 QJsonObject sage_object = json_doc.object();
205 QJsonValue output_path = sage_object.value("output_paths");
206 if(output_path.isUndefined())
207 {
208 throw pappso::ExceptionNotFound(QObject::tr("output_paths not found in Sage json document"));
209 }
210
211 if(!output_path.isArray())
212 {
213 throw pappso::ExceptionNotFound(QObject::tr("output_paths is not an array"));
214 }
215 for(auto element : output_path.toArray())
216 {
217 if(element.isString())
218 {
219 if(element.toString().endsWith(".tsv"))
220 {
221 path = element.toString();
222 }
223 }
224 }
225
226 return convertToLocalFileOrDie(path);
227}
228
229QString
231{
232 QString path;
233 QJsonObject sage_object = json_doc.object();
234 QJsonValue database = sage_object.value("database");
235 if(database.isUndefined())
236 {
237 throw pappso::ExceptionNotFound(QObject::tr("database not found in Sage json document"));
238 }
239 path = database.toObject().value("fasta").toString();
240 if(path.isEmpty())
241 {
242 throw pappso::ExceptionNotFound(QObject::tr("fasta value is empty"));
243 }
244
245
246 return convertToLocalFileOrDie(path);
247}
248
249QString
251{
252
253 // if we have an URL : convert it to local file
254 qDebug() << path;
255 if(path.startsWith("file:") || path.startsWith("http:") || path.startsWith("https:"))
256 {
257 QUrl tsv_url(path);
258 if(tsv_url.isValid())
259 {
260 qDebug() << "tsv_url.isValid()";
261 if(tsv_url.isLocalFile())
262 {
263 qDebug() << "tsv_url.isLocalFile()";
264 return tsv_url.toLocalFile();
265 }
266 else
267 {
269 QObject::tr("Unable to load data from remote URL %1").arg(tsv_url.toString()));
270 }
271 }
272 }
273 return path;
274}
275
276std::vector<pappso::cbor::psm::SageReader::SageModification>
278{
279 std::vector<SageReader::SageModification> list;
280 QJsonObject sage_object = m_sageFileReader.getJsonDocument().object();
281 QJsonValue database = sage_object.value("database");
282 if(database.isUndefined())
283 {
284 throw pappso::ExceptionNotFound(QObject::tr("database not found in Sage json document"));
285 }
286
287 QJsonValue static_mods = database.toObject().value("static_mods");
288 if(static_mods.isUndefined())
289 {
290 throw pappso::ExceptionNotFound(QObject::tr("static_mods not found in Sage json document"));
291 }
292 for(QString residue_str : static_mods.toObject().keys())
293 {
294 SageModification modif;
295 modif.residue = residue_str.at(0);
297 (Enums::AminoAcidChar)modif.residue.toLatin1(),
298 static_mods.toObject().value(residue_str).toDouble());
299 modif.strModification =
300 QString::number(static_mods.toObject().value(residue_str).toDouble(), 'f', 6);
301 if(modif.strModification.isEmpty())
302 {
303 throw pappso::PappsoException(QObject::tr(" modif.strModification is empty"));
304 }
305 if(modif.modification->getMass() < 0)
306 {
307 modif.strModification = QString("[%1]").arg(modif.strModification);
308 }
309 else
310 {
311 modif.strModification = QString("[+%1]").arg(modif.strModification);
312 }
313 list.push_back(modif);
314 }
315 return list;
316}
317
318std::vector<pappso::cbor::psm::SageReader::SageModification>
320{
321 std::vector<SageReader::SageModification> list;
322 QJsonObject sage_object = m_sageFileReader.getJsonDocument().object();
323 QJsonValue database = sage_object.value("database");
324 if(database.isUndefined())
325 {
326 throw pappso::ExceptionNotFound(QObject::tr("database not found in Sage json document"));
327 }
328
329 QJsonValue var_mods = database.toObject().value("variable_mods");
330 if(var_mods.isUndefined())
331 {
332 throw pappso::ExceptionNotFound(QObject::tr("static_mods not found in Sage json document"));
333 }
334 for(QString residue_str : var_mods.toObject().keys())
335 {
336 SageModification modif;
337 modif.residue = residue_str.at(0);
338 for(QJsonValue one_mass : var_mods.toObject().value(residue_str).toArray())
339 {
341 (Enums::AminoAcidChar)modif.residue.toLatin1(), one_mass.toDouble());
342 modif.strModification = QString::number(one_mass.toDouble(), 'f', 6);
343 if(modif.strModification.isEmpty())
344 {
345 throw pappso::PappsoException(QObject::tr(" modif.strModification is empty"));
346 }
347 if(modif.modification->getMass() < 0)
348 {
349 modif.strModification = QString("[%1]").arg(modif.strModification);
350 }
351 else
352 {
353 modif.strModification = QString("[+%1]").arg(modif.strModification);
354 }
355 list.push_back(modif);
356 }
357 }
358 return list;
359}
360
361QString
363{
364 QString path;
365 QJsonObject sage_object = m_sageFileReader.getJsonDocument().object();
366 QJsonValue database = sage_object.value("database");
367 if(database.isUndefined())
368 {
369 throw pappso::ExceptionNotFound(QObject::tr("database not found in Sage json document"));
370 }
371 path = database.toObject().value("decoy_tag").toString();
372 if(path.isEmpty())
373 {
374 throw pappso::ExceptionNotFound(QObject::tr("decoy_tag value is empty"));
375 }
376 return path;
377}
378
pappso_double getMass() const
void parse(QFile &fastaFile)
static AaModificationP guessAaModificationPbyMonoisotopicMassDelta(Enums::AminoAcidChar aa, pappso_double mass)
Definition utils.cpp:677
overrides QCborStreamWriter base class to provide convenient functions
void setSequence(const QString &description_in, const QString &sequence_in) override
const QString & getMzmlPath(const QString &file_msrun) const
SageReader(pappso::UiMonitorInterface *p_monitor, pappso::cbor::CborStreamWriter *p_output, const SageFileReader &sage_file_reader, const QString &sage_json_file)
std::vector< SageModification > getStaticModificationList() const
pappso::cbor::CborStreamWriter * mp_cborWriter
Definition sagereader.h:100
const QString & getmJsonAbsoluteFilePath() const
void extractMzmlPathList(const QJsonDocument &json_doc)
pappso::cbor::CborStreamWriter & getCborStreamWriter() const
const SageFileReader & m_sageFileReader
Definition sagereader.h:98
std::vector< SageModification > getVariableModificationList() const
QString getFastaFilePath(const QJsonDocument &json_doc)
const SageFileReader & getSageFileReader() const
QString getTsvFilePath(const QJsonDocument &json_doc)
pappso::UiMonitorInterface * mp_monitor
Definition sagereader.h:99
QString convertToLocalFileOrDie(const QString &file_str) const
std::shared_ptr< Protein > protein_sp