#!/usr/bin/perl
#
#  script to assess ion libraries (DIALib-QC)
#
=for comment

Copyright (C) 2020 Moritz Lab, Institute for Systems Biology

DIALib-QC.1.2 program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.


You should have received a copy of the GNU General Public License
along with this program.  If not, see <https://www.gnu.org/licenses/>.
=cut


# Import libraries
use strict;
use Data::Dumper; 
use File::Basename;
use Getopt::Long;
use Text::ParseWords;

my $t0 = time();
print STDERR "Starting run\n";

# Test for presence of optional (non-core) regression module
my $has_regression = 1;
eval "use Statistics::Regression; 1" or $has_regression = 0;
if ( !$has_regression ) {
   print STDERR "Unable to load module Statistics::Regression, skipping RT correlation analysis\n";
}

# Global vars
my %opts = read_options();  # Command-line options
my %problem_assays;
##+
## Hash of known modifications
##-
my %known_mods = (
		'C[CAM]' => {'mz' => 57.0215, 'aa' => 'c'},
		'C(UniMod:4)' => {'mz' => 57.0215, 'aa' => 'c'},
		'C[+57]' => {'mz' => 57.0215, 'aa' => 'c'},
		'C[+57.0]' => {'mz' => 57.0215, 'aa' => 'c'},
		'C[57]' => {'mz' => 57.0215, 'aa' => 'c'},
		'C[Carbamidomethyl (C)]' => {'mz' => 57.0215, 'aa' => 'c'},
		'C[+C2+H3+N+O]' => {'mz' => 57.0215, 'aa' => 'c'},
		'C[160]' => {'mz' => 57.0215, 'aa' => 'c'},
		'C(UniMod:39)' => {'mz' => 45.987721 , 'aa' => 'c'}, 
		'C[149]' => {'mz' => 45.9877721, 'aa' => 'c'}, 
		'C[46]' => {'mz' => 45.987721, 'aa' => 'c'}, 
		'C[+46.0]' => {'mz' => 45.987721, 'aa' => 'c'}, 
		'C[Methylthio]' => {'mz' => 45.987721 , 'aa' => 'c'}, 
                'C[MSH]' => {'mz' => 45.987721 , 'aa' => 'c'},
		'C[PCm]' => {'mz' => 39.994915, 'aa' => 'c'}, 
		'C[143]' => {'mz' => 39.994915, 'aa' => 'c'}, 
		'C[40]' => {'mz' => 39.994915, 'aa' => 'c'}, 
		'C[+40]' => {'mz' => 39.994915, 'aa' => 'c'}, 
		'C[+40.0]' => {'mz' => 39.994915, 'aa' => 'c'},
		'C(UniMod:35)' => {'mz' => 15.994915, 'aa' => 'c'}, 
		'L(UniMod:35)' => {'mz' => 15.994915, 'aa' => 'l'}, 
		'A(UniMod:35)' => {'mz' => 15.994915, 'aa' => 'a'},
		'D(UniMod:35)' => {'mz' => 15.994915, 'aa' => 'd'},
		'V(UniMod:35)' => {'mz' => 15.994915, 'aa' => 'v'},
		'F(UniMod:35)' => {'mz' => 15.994915, 'aa' => 'f'},
		'Y(UniMod:35)' => {'mz' => 15.994915, 'aa' => 'y'},
		'T(UniMod:35)' => {'mz' => 15.994915, 'aa' => 't'},
		'G(UniMod:35)' => {'mz' => 15.994915, 'aa' => 'g'},
		'Q(UniMod:35)' => {'mz' => 15.994915, 'aa' => 'q'},
		'N(UniMod:35)' => {'mz' => 15.994915, 'aa' => 'n'},
		'I(UniMod:35)' => {'mz' => 15.994915, 'aa' => 'i'},
		'E(UniMod:35)' => {'mz' => 15.994915, 'aa' => 'e'},
		'S(UniMod:35)' => {'mz' => 15.994915, 'aa' => 's'}, 
		'C(UniMod:26)' => {'mz' => 39.994915, 'aa' => 'c'}, 
		'C[Pyro-Carbamidomethyl (Any N-term)]' => {'mz' => -17.026549, 'aa' => 'c'}, 
		'M[CAM]' => {'mz' => 57.0215, 'aa' => 'm'},
		'M(UniMod:4)' => {'mz' => 57.0215, 'aa' => 'm'},
		'M[+57]' => {'mz' => 57.0215, 'aa' => 'm'},
		'M[+57.0]' => {'mz' => 57.0215, 'aa' => 'm'},
		'M[57]' => {'mz' => 57.0215, 'aa' => 'm'},
		'M[Oxi]' => {'mz' => 15.994915, 'aa' => 'm'},
		'M[Oxidation (M)]' => {'mz' => 15.994915, 'aa' => 'm'},
		'M(UniMod:35)' => {'mz' => 15.994915, 'aa' => 'm'},
		'M[147]' => {'mz' => 15.994915, 'aa' => 'm'},
		'M[+16]' => {'mz' => 15.994915, 'aa' => 'm'},
		'M[+O]' => {'mz' => 15.994915, 'aa' => 'm'},
		'M[+16.0]' => {'mz' => 15.994915, 'aa' => 'm'},
		'M[16]' => {'mz' => 15.994915, 'aa' => 'm'},
		'P[Oxi]' => {'mz' => 15.994915, 'aa' => 'p'},
		'P[Oxidation (P)]' => {'mz' => 15.994915, 'aa' => 'p'},
		'W[Oxi]' => {'mz' => 15.994915, 'aa' => 'w'},
		'W[Oxidation (W)]' => {'mz' => 15.994915, 'aa' => 'w'},
		'W(UniMod:35)' => {'mz' => 15.994915, 'aa' => 'w'},
		'W[+16]' => {'mz' => 15.994915, 'aa' => 'w'},
		'W[+16.0]' => {'mz' => 15.994915, 'aa' => 'w'},
		'W[16]' => {'mz' => 15.994915, 'aa' => 'w'},
		'W[220]' => {'mz' => 15.9949, 'aa' => 'w'},
		'Q[Dea]' => {'mz' => 0.984016, 'aa' => 'q'},
		'R[Dea]' => {'mz' => 0.984016, 'aa' => 'r'},
		'T[Dhy]' => {'mz' => -18.010565, 'aa' => 't'},
		'S[Pho]' => {'mz' => 79.966331, 'aa' => 's'},
		'[-1R]' => {'mz' => -156.101111, 'aa' => 'x'},
		'N[dAm]' => {'mz' => -17.026549, 'aa' => 'n'},
		'[CRM]-' => {'mz' => 43.005814, 'aa' => 'x'},
		'D[Sud]' => {'mz' => 31.972071, 'aa' => 'd'},
		'S[UGG]' => {'mz' => 114.042927, 'aa' => 's'},
		'Y[Amn]' => {'mz' => 15.010899, 'aa' => 'y'},
		'D[Oxi]' => {'mz' => 15.994915, 'aa' => 'd'},
		'[-1K]' => {'mz' => -128.094963, 'aa' => 'x'},
		'S[PPE]' => {'mz' => 340.085794, 'aa' => 's'},
		'S[HNc]' => {'mz' => 203.079373, 'aa' => 's'},
		'[AAS]-' => {'mz' => 26.01565, 'aa' => 'x'},
		'M[CRM]' => {'mz' => 43.005814, 'aa' => 'm'},
		'F[1Br]' => {'mz' => 77.910511, 'aa' => 'f'},
		'S[1Me]' => {'mz' => 14.01565, 'aa' => 's'},
		'N[1NF]' => {'mz' => 349.137281, 'aa' => 'n'},
		'K[HNE]' => {'mz' => 156.11503, 'aa' => 'k'},
		'D[Cox]' => {'mz' => 43.989829, 'aa' => 'd'},
		'K[Oxi]' => {'mz' => 15.994915, 'aa' => 'k'},
		'T[HNc]' => {'mz' => 203.079373, 'aa' => 't'},
		'E[GPE]' => {'mz' => 197.04531, 'aa' => 'e'},
		'N[1K2]' => {'mz' => 730.264392, 'aa' => 'n'},
		'S[-2H]' => {'mz' => -2.01565, 'aa' => 's'},
		'[CuX]' => {'mz' => 61.921774, 'aa' => 'x'},
		'N[Oxi]' => {'mz' => 15.994915, 'aa' => 'n'},
		'R[Oxi]' => {'mz' => 15.994915, 'aa' => 'r'},
		'Y[Oxi]' => {'mz' => 15.994915, 'aa' => 'y'},
		'F[Oxi]' => {'mz' => 15.994915, 'aa' => 'f'},
		'D[CuX]' => {'mz' => 61.921774, 'aa' => 'd'},
		'K[3Me]' => {'mz' => 42.04695, 'aa' => 'k'},
		'D[1Me]' => {'mz' => 14.01565, 'aa' => 'd'},
		'L[1Me]' => {'mz' => 14.01565, 'aa' => 'l'},
		'R[1Me]' => {'mz' => 14.01565, 'aa' => 'r'},
		'E[1Me]' => {'mz' => 14.01565, 'aa' => 'e'},
		'H[1Me]' => {'mz' => 14.01565, 'aa' => 'h'},
		'Q[1Me]' => {'mz' => 14.01565, 'aa' => 'q'},
		'R[2Me]' => {'mz' => 28.0313, 'aa' => 'r'},
		'P[2Ox]' => {'mz' => 31.989829, 'aa' => 'p'},
		'Y[2Ox]' => {'mz' => 31.989829, 'aa' => 'y'},
		'H[CAM]' => {'mz' => 57.0215, 'aa' => 'h'},
		'Y[Dhy]' => {'mz' => -18.010565, 'aa' => 'y'},
		'N[K2F]' => {'mz' => 876.322301, 'aa' => 'n'},
		'Y[-2H]' => {'mz' => -2.01565, 'aa' => 'y'},
		'D[MSH]' => {'mz' => 45.987721, 'aa' => 'd'},
		'T[NHN]' => {'mz' => 947.323029, 'aa' => 't'},
		'S[GCn]' => {'mz' => 176.032088, 'aa' => 's'},
		'K[OH2]' => {'mz' => 340.100562, 'aa' => 'k'},
		'N[G2d]' => {'mz' => 1216.422863, 'aa' => 'n'},
		'T[1Ac]' => {'mz' => 42.010565, 'aa' => 't'},
		'S[1Ac]' => {'mz' => 42.010565, 'aa' => 's'},
		'[Myr]-' => {'mz' => 210.198366, 'aa' => 'x'},
		# 'Y[Dhy]' => {'mz' => -18.010565, 'aa' => 'y'},#EXAMPLE LINE TO ADD MODIFICATION
		'Q(UniMod:7)' => {'mz' => 0.984016, 'aa' => 'q'}, 
		'H(UniMod:35)' => {'mz' => 15.994915, 'aa' => 'h'},
		# 'U' => {'mz' => 150.95363, 'aa' => 'u'}, #U = selenocysteine 
		'[CAM]-' => {'mz' => 57.0215, 'aa' => 'x'},
		'D[CAM]' => {'mz' => 57.0215, 'aa' => 'd'},
		'S[CAM]' => {'mz' => 57.0215, 'aa' => 's'},
		'E[CAM]' => {'mz' => 57.0215, 'aa' => 'e'},
		'T[CAM]' => {'mz' => 57.0215, 'aa' => 't'},
		'Y[CAM]' => {'mz' => 57.0215, 'aa' => 'y'},
		'K[CAM]' => {'mz' => 57.0215, 'aa' => 'k'},
		'D[NaX]' => {'mz' => 21.981943, 'aa' => 'd'},
		'E[NaX]' => {'mz' => 21.981943, 'aa' => 'e'},
		'E[KXX]' => {'mz' => 37.955882, 'aa' => 'e'},
		'[Ami]' => {'mz' => -0.984016, 'aa' => 'x'},
		'K[Frm]' => {'mz' => 27.994915, 'aa' => 'k'},
		'[Frm]-' => {'mz' => 27.994915, 'aa' => 'x'},
		'F[2Ox]' => {'mz' => 31.989829, 'aa' => 'f'},
		'M[2Ox]' => {'mz' => 31.989829, 'aa' => 'm'},
		'W[2Ox]' => {'mz' => 31.989829, 'aa' => 'w'},
		'S[Dhy]' => {'mz' => -18.010565, 'aa' => 's'},
		'[+1R]-' => {'mz' => 156.101111, 'aa' => 'x'},
		'T[-2H]' => {'mz' => -2.01565, 'aa' => 't'},
		'[-2H]' => {'mz' => -2.01565, 'aa' => 'x'},
		'D[KXX]' => {'mz' => 37.955882, 'aa' => 'd'},
		'D[Dhy]' => {'mz' => -18.010565, 'aa' => 'd'},
		'E[Dhy]' => {'mz' => -18.010565, 'aa' => 'e'},
		'K[AAR]' => {'mz' => 28.0313, 'aa' => 'k'},
		'W[Kyn]' => {'mz' => 3.994915, 'aa' => 'w'},
		'E[CuX]' => {'mz' => 61.921774, 'aa' => 'e'},
		'[+1K]-' => {'mz' => 128.094963, 'aa' => 'x'},
		'M[DTM]' => {'mz' => -48.003371, 'aa' => 'm'},
		'R[CRM]' => {'mz' => 43.005814, 'aa' => 'r'},
		'H[Oxi]' => {'mz' => 15.994915, 'aa' => 'h'},
		'K[AAS]' => {'mz' => 26.01565, 'aa' => 'k'}, 
		'H[AAS]' => {'mz' => 26.01565, 'aa' => 'h'}, 
		'N[Dea]' => {'mz' => 0.984016, 'aa' => 'n'},
		'W[HKy]' => {'mz' => 19.989829, 'aa' => 'w'},
		'D[2CM]' => {'mz' => 114.0429274, 'aa' => 'd'},
		'R[AGA]' => {'mz' => -43.053433, 'aa' => 'r'},
		'R[Orn]' => {'mz' => -42.021798, 'aa' => 'r'},
		'[NaX]' => {'mz' => 21.981943, 'aa' => 'x'},
		'P[PYD]' => {'mz' => -30.010565, 'aa' => 'p'},
		'N[MDe]' => {'mz' => 14.999666, 'aa' => 'n'},
		'N[Deamidation (N)]' => {'mz' => 0.984016, 'aa' => 'n'},
		'N[Deamidation (NQ)]' => {'mz' => 0.984016, 'aa' => 'n'},
		'N(UniMod:7)' => {'mz' => 0.984016, 'aa' => 'n'},
		'N[115]' => {'mz' => 0.984016, 'aa' => 'n'},
		'N[+1.0]' => {'mz' => 0.984016, 'aa' => 'n'},
		'N[1]' => {'mz' => 0.984016, 'aa' => 'n'}, 
		'E[PGE]' => {'mz' => -18.010565, 'aa' => 'e'},
		'[PGE]-' => {'mz' => -18.010565, 'aa' => 'x'},
		'P[PGP]' => {'mz' => 13.979265, 'aa' => 'p'},
		'[AAR]-' => {'mz' => 28.0313, 'aa' => 'x'},
		'K[CRM]' => {'mz' => 43.005814, 'aa' => 'k'},
		'E[111]' => {'mz' => -18.010565, 'aa' => 'e'},
		'E(UniMod:27)' => {'mz' => -18.010565, 'aa' => 'e'}, 
		'E[-18]' => {'mz' => -18.010565, 'aa' => 'e'},
		'E[-18.0]' => {'mz' => -18.010565, 'aa' => 'e'},
		'E[Glu->pyro-Glu (Any N-term)]' => {'mz' => -18.010565, 'aa' => 'e'},
		'E[Glu->pyro-Glu]' => {'mz' => -18.010565, 'aa' => 'e'},
		'Q[PGQ]' => {'mz' => -17.026549, 'aa' => 'q'},
		'[PGQ]-' => {'mz' => -17.026549, 'aa' => 'x'},
		'Q[111]' => {'mz' => -17.026549, 'aa' => 'q'},
		'Q(UniMod:28)' => {'mz' => -17.026549, 'aa' => 'q'}, 
		'(UniMod:28)' => {'mz' => -17.026549, 'aa' => 'x'}, 
		'Q[-17]' => {'mz' => -17.026549, 'aa' => 'q'},
		'Q[-17.0]' => {'mz' => -17.026549, 'aa' => 'q'},
		'Q[Glu pyro-Glu (Any N-term)]' => {'mz' => -17.026549, 'aa' => 'q'}, 
		'Q[Glu pyro-Glu]' => {'mz' => -17.026549, 'aa' => 'q'}, 
		'Q[Dea]' => {'mz' => 0.984016, 'aa' => 'q'}, 
		'Q[Deamidation (NQ)]' => {'mz' => 0.984016, 'aa' => 'q'},
		'[42]' => {'mz' => 42.010565, 'aa' => 'x'},
		'[42.0]' => {'mz' => 42.010565, 'aa' => 'x'},
		'[1Ac]-' => {'mz' => 42.010565, 'aa' => 'x'},
		'(UniMod:1)' => {'mz' => 42.010565, 'aa' => 'x'},
		'[Acetyl +(N-term)]' => {'mz' => 42.010565, 'aa' => 'x'},
		'[Acetyl +(Any N-term)]' => {'mz' => 42.010565, 'aa' => 'x'},
		'[Acetyl +(Protein N-term)]' => {'mz' => 42.010565, 'aa' => 'x'},
                '[Acetyl (Protein N-term)]' => {'mz' => 42.010565, 'aa' => 'x'},
		'M[Acetyl +(Protein N-term)]' => {'mz' => 42.010565, 'aa' => 'm'},
		'K[Acetyl (K)]' => {'mz' => 42.010565, 'aa' => 'k'},
		'R[Acetyl (Protein N-term)]' => {'mz' => 42.010565, 'aa' => 'r'},
		'A[Acetyl (Protein N-term)]' => {'mz' => 42.010565, 'aa' => 'a'},
		'S[Acetyl (Protein N-term)]' => {'mz' => 42.010565, 'aa' => 's'},
		'D[Acetyl (Protein N-term)]' => {'mz' => 42.010565, 'aa' => 'd'},
		'M[Acetyl (Protein N-term)]' => {'mz' => 42.010565, 'aa' => 'm'},
		'T[Acetyl (Protein N-term)]' => {'mz' => 42.010565, 'aa' => 't'},
		'V[Acetyl (Protein N-term)]' => {'mz' => 42.010565, 'aa' => 'v'},
		'E[Acetyl (Protein N-term)]' => {'mz' => 42.010565, 'aa' => 'e'},
		'G[Acetyl (Protein N-term)]' => {'mz' => 42.010565, 'aa' => 'g'},
		'P[Acetyl (Protein N-term)]' => {'mz' => 42.010565, 'aa' => 'p'},
		'K[Acetyl (Protein N-term)]' => {'mz' => 42.010565, 'aa' => 'k'},
              );
my %mods_current;
my $nterm_mod;
my $cterm_mod;
my $LTOS;	#LossTypeOpenSwath;

# Variables for SWATH bin analysis
my %swaths;
my %swadefs;
my @swaths;

# Constants
use constant WATER_MASS => 18.010565;
use constant H_MASS => 1.00727646688;

if ( $opts{swath_file} ) {
  open SWA, $opts{swath_file};
  while ( my $line = <SWA> ) {
    chomp $line;
    my @line = split( /\s+/, $line );
    next unless $line[0] =~ /\d/; 
    next unless $line[1] =~ /\d/; 
    $swadefs{$line[0]} = $line[1];
  }
  close SWA;
}
if ( $opts{swath_file} ) {
  @swaths = ( qw( swa_defined swa_missing swa_conflict swa_ok swa_conflict_assay swa_5 swa_25 ) );
}

my %stats = ( mc => 0 ); # Hash of output values
# Initialize stats hash
for my $key ( qw( totpeps libpeps speps mc libprots sprots mprots precursor_ok precursor_bad fragment_ok fragment_bad fragment_na ), @swaths ) {
  $stats{$key} = 0;
}

# ppeps is a peptide/protein mapping file from reference DB; pr are proteotypic
my %ppeps = ( prots => {}, peps => {}, pr => {} );

# read in opional peptide digest file
if ( $opts{peptide_file} ) {
  print STDERR "File $opts{peptide_file} does not exist\n" if ! -e $opts{peptide_file};

  open PEPS, $opts{peptide_file}|| die;
  while ( my $line = <PEPS> ) {
    chomp $line;
    my @line = split( /\t/, $line );
    $ppeps{peps}->{$line[1]}++;
    if ( $line[0] !~ /,/ ) {
      $ppeps{pr}->{$line[0]}++;
    }
    for my $prot ( split( /,/, $line[0] ) ) {
      $ppeps{prots}->{$prot}++;
    }
  }
  close PEPS;
}
$stats{libpeps} = scalar( keys( %{$ppeps{peps}} ) );
$stats{libprots} = scalar( keys( %{$ppeps{prots}} ) );
$stats{libprprots} = scalar( keys( %{$ppeps{pr}} ) ); # proteins with at least one proteotypic peptide

my $libname = basename( $opts{ion_library} );
$libname =~ s/\.tsv$//;
$libname =~ s/\.csv$//;

# Open and read ion library file
open ILIB, $opts{ion_library}|| die;
my $head = 1;
my $type;

my $dta_dir = $libname . '_dta';
$dta_dir =~ s/\./_/g;
my $write_data = $opts{write} || 0;
if ( $write_data ) {
  if ( -e "$libname.mgf" ) {
    print "MGF file exists, exiting\n";
    exit;
  }
  open MGF, ">$libname.mgf" || die;
}

my $curr;
my %dta = ( frg => {}, pre => {z => '', mz => ''} );
my $pepkey;

# hash of all accessions recorded in ion library
my %prots;

my %peps = ( sing => {}, 
             mult => {},
             pre_z => {}, # precursor charge
             frg_z => {}, # fragment charge
             frg_n => {}, # fragment number
             frg_s => {}, # tally fragment series
             frg_i => { cnt => 0, inten => 0 }, # Summed intensity of top 5 
             ); 

my %rt = ( mseqs => {}, iRT => {} );

my $irt_cnt = 0;
my %is_irt;
for my $irt ( qw( ADVTPADFSEWSK DGLDAASYYAPVR GAGSSEPVTGLDAK GTFIIDPAAVIR GTFIIDPGGVIR LFLQFGAQGSPFLK LGGNEQVTR TPVISGGPYEYR TPVITGAPYEYR VEATFGVDESNAK YILAGVENSK ) ) {
  $is_irt{$irt}++;
}

# Hash of recognized neutral losses to their masses.
my %losses = ( 'H2O' => 18.010565,
               '1(+C+H4+O+S)' => 64.107989,
               'NH3' => 17.026549,
               'noloss' => 0,
	       '-17' => 17.026549,
               '-18' => 18.010565,
	       '-64' => 64.107989
); 

my %decoy = ( mixed => 0, decoy => 0, fwd => 0);
my %extrema = ( precursor_min => 10000, fragment_min => 10000, precursor_max => 0, fragment_max => 0 );

if ( $opts{print_mass_error} ) {
  my $me = $opts{output_file} . ".mass_error";
  if ( -e $me ) { 
    print STDERR "mass error file $me exists, cannot overwrite\n";
    exit;
  }
  open ME, ">$me";
}


my %cmap;            # Map of column headings to number (nth column)
my %idx;             # Map of target information to column index
my %massmap;  
my %precursorstats;  
my %ion_cnt;
my %tmax;
my $osw_altmod = 0;
my $is_CSV;
my $quote_char = '';
my @imarray;
my @imsort;
while ( my $line = <ILIB> ) {
  chomp $line;
  my @line = parse_line( $line );
  if ( !defined $is_CSV ) {
    if ( scalar( @line ) < 5 ) {
      @line = parse_csv( $line );
      if ( scalar( @line ) < 5 ) {
        print STDERR "Unknown file format, must be either TSV or CSV\n";
        exit 256;
      } else {
        $is_CSV++;
        print STDERR "CSV format detected\n";
        if ( $line =~ /^(["']).*["']\s*$/ ) {
          $quote_char = $1;
          print STDERR "Quote character is $quote_char\n";
        }
      }
    } else {
      $is_CSV = 0;
    }
  }

  if ( $head ) {
    my $idx = 0;
    for my $col ( @line ) {
      $cmap{$col} = $idx++;
    }
    if ( !$line[1] ) {
      print STDERR "Error: $ARGV[0] format invalid\n";
    } elsif ( $line =~ /[Q1 q1]/ && ($line =~ /modification_sequence/ || $line =~ /peptideModSeq/)) {
      $type = 'pv';
      print STDERR "Peakview library detected\n";
    } elsif ( $line =~ /PrecursorMz/ ) {
      if ( $line =~ /transition_group_id/ ) {
        print STDERR "OpenSWATH library detected\n";
        $type = 'os';
      } elsif ( $line =~ /iRT Value/ || $line =~ /ReferenceRun/ ) {
        print STDERR "Spectronaut library detected\n";
        $type = 'sn';
      } elsif ( $line =~ /FragmentLossType/ ) {
        print STDERR "Prosit/Fusion library format detected\n";
        $type = 'pf'; # 'Generic'
      } else {
       print STDERR "Error: $ARGV[0] format unknown\n";
      }
      if ( $type eq 'os' ) {
        if ( !defined $cmap{FullUniModPeptideName} ) {
          $cmap{FullUniModPeptideName} = $cmap{FullPeptideName};
        }
        if ( !defined $cmap{PrecursorCharge} ) {
          $cmap{PrecursorCharge} = $cmap{Charge};
        }
        if ( !defined $cmap{Tr_recalibrated} ) {
          $cmap{Tr_recalibrated} = $cmap{RetentionTime};
        }
        die "Can't find peptide name" if !defined $cmap{FullUniModPeptideName};
      }
    } else {
      print STDERR "Error: $ARGV[0] format invalid \n";
    }
    exit unless $type;
    $head = 0;

    # Modifed to fetch indexes because of perl default array indexing could hide issues.
    # find_index gets up to 3 args: arrayref of possible headings, checked in order, common
    # name of item being sought (for debug messages), and (opt) whether it is OK to have null
    $idx{frg_z} = find_index( [qw(frg_z FragmentCharge ProductCharge)], 'Fragment ion charge', 1); 
    $idx{ion_s} = find_index( ['frg_type','FragmentType','FragmentIonType'], 'Ion series', 1 ); 
    $idx{ion_n} = find_index( ['frg_nr','FragmentNumber','FragmentSeriesNumber'], 'Ion series number', 1); 
    $idx{mseq} = find_index( ['ModifiedPeptideSequence','modification_sequence','FullUniModPeptideName', 'PeptideModifiedSequence', 'ModifiedPeptide','IntModifiedPeptide' , 'peptideModSeq' ], 'Modified peptide sequence' ); 
    $idx{seq} = find_index( ['stripped_sequence','PeptideSequence', 'StrippedPeptide', 'peptide_sequence'], 'Stripped peptide sequence' ); 
    $idx{precursor} = find_index( ['Q1','PrecursorMz','q1'], 'Precursor m/z' ); 
    $idx{fragment} = find_index( ['Q3','ProductMz', 'FragmentMz','q3'], 'Fragment m/z' ); 
    $idx{rt} = find_index( ['NormalizedRetentionTime','RT_detected','Tr_recalibrated', 'iRT Value', 'iRT', 'rt_detected', 'irt'], 'Retention time' ); 
    $idx{pre_z} = find_index( [qw(prec_z PrecursorCharge)], 'Precursor ion charge' ); 
    # Missing value OK for these three
    $idx{protstr} = find_index( ['uniprot_id','protein_name','ProteinName','Protein Name','ProteinId', 'UniprotID', 'UniProtIds' ],'Protein name(s)',1 ); 
    $idx{decoy} = find_index( ['decoy'], 'Decoy',1 ); 
    $idx{lossType} = find_index( [qw( FragmentLossType ) ], 'Fragment neutral loss', 1 ); 
    $idx{transition_name} = find_index( ['transition_name'], 'transition_name',1 );
    $idx{IonMobility} = find_index( ['IonMobility','PrecursorIonMobility'], 'Ion Mobility',1 );
    next;
  }

  # OS decoy skipping column triage
  if ( $opts{skip_decoys} ) {
    if ( $type eq 'os' ) {
      next if !defined $idx{decoy};
      if ( $line[$idx{decoy}] eq 'TRUE' || !$line[$idx{decoy}] ) {
        next;
      }
    }
  }

  ##  Extract assay-related data based on library type Decoy_9_y9_1_AAAAAAAAAAGAAGGR_2 or 9_y9_1_AAAAAAAAAAGAAGGR_2

  # Precursor charge
  my $pre_z = $line[$idx{pre_z}];
  $peps{pre_z}->{$pre_z}++;

  my $frg_z; my $ion_n; my $ion_s;
  # Fragment charge
  if($idx{frg_z}){
  	$frg_z = $line[$idx{frg_z}];
  }
  # Ion series
  if($idx{ion_s}){
	$ion_s = $line[$idx{ion_s}];
	if($ion_s =~ /-/){
		if ($ion_s =~ /(-17)/)  {
			$LTOS = '-17';
		} elsif ($ion_s =~ /(-18)/)  {
			$LTOS = '-18';
		} elsif ($ion_s =~ /(-64)/)  {
			$LTOS = '-64';
		}
			
	$ion_s =~ s/-17//g;
	$ion_s =~ s/-18//g;
	$ion_s =~ s/-64//g; #get rid of the -17, -18, -64  you get y10, y16, y10
	}else {
		$LTOS = '';
	}
  }

  # Ion series number
  if($idx{ion_n}){
	$ion_n = $line[$idx{ion_n}];
  }
  if (!defined $frg_z || !defined $ion_s || !defined $ion_n){
	if($idx{transition_name}){
		my $tr_name = $line[$idx{transition_name}];
		$tr_name =~ s/"DECOY_"//g; 
		my ( @tr_array ) = split( /_/, $tr_name );
		if (!defined $frg_z){
			# Fragment charge
			$frg_z = $tr_array[2];
		}
                # Ion series
                my $ion_sno = $tr_array[1];
                ##++++ 
                #OpenSwath library has y-1710, y-1816, y-6410 neutral losses for HN3, Water and oxidized methionine
                # so get rid of the -17, -18, -64 and consider these neutral losses in calculations.
                if($ion_sno =~ /-/){
			if ($ion_sno =~ /("-17")/)  {
				$LTOS = '-17';
			} elsif ($ion_sno =~ /("-18")/)  {
				$LTOS = '-18';
			} elsif ($ion_sno =~ /("-64")/)  {
				$LTOS = '-64';
			}
			$ion_sno =~ s/-17//g;
			$ion_sno =~ s/-18//g;
			$ion_sno =~ s/-64//g; #get rid of the -17, -18, -64  you get y10, y16, y10
		}else {
				$LTOS = '';
			}
		if (!defined $ion_s){
			if ($ion_sno =~ /([b y])/){			
				$ion_s = $1;
			}
		}
		
		if (!defined $ion_n){
			$ion_sno =~ s/[b y]//g;
			$ion_n = $ion_sno;
		}
	}
   }
      
   $peps{frg_z}->{$frg_z}++;
   $peps{frg_s}->{$ion_s}++;
   $peps{frg_n}->{$ion_n}++;		

  # Modified sequence
  my $mseq = $line[$idx{mseq}];
  $mseq =~ s/^_//g;
  $mseq =~ s/_$//g;

  # Stripped sequence
  my $seq = $line[$idx{seq}];

  # Retention time
  my $rt = $line[$idx{rt}];

 # Ion Mobility
  my $im = $line[$idx{IonMobility}];
  push (@imarray, $im);


  # Fragment m/z
  my $fragment = $line[$idx{fragment}];

  #Precursor m/z
  my $precursor = $line[$idx{precursor}];
  $precursorstats{$precursor}++;

  # Protein string (may be multiple).
  my $protstr = $line[$idx{protstr}];
  if ( !defined $idx{protstr} ) {
    $protstr = 'Default';
  }

  my $lossType = ( defined $idx{lossType} ) ? $line[$idx{lossType}] : '';
  if ( $lossType && !defined( $losses{$lossType} ) ) {
    print STDERR "unknown loss type $lossType specified, known values include:\n";
    for my $ls ( sort( keys( %losses ) ) ) {
      print STDERR join( "\t", $ls, $losses{$ls} ) . "\n";
    }
    exit 144;
  }

  if ( $opts{debug} ) {
    print STDERR "Dumping parsed values";
    print STDERR qq~
protstr: $protstr
precursor: $precursor
fragment: $fragment
rt: $rt
seq: $seq
mseq: $mseq
pre_z: $pre_z
frg_s: $ion_s
frg_n: $ion_n
frg_z: $frg_z

~;

    print STDERR "Dumping column mappings\n";
    for my $f ( sort( keys( %cmap ) ) ) {
      print STDERR join( "\t", $cmap{$f}, $f, $line[$cmap{$f}] ) . "\n"; 
    }
    exit;
  }


  # Mod seq + charge
  $pepkey = $mseq . '_' . $pre_z;
  $ion_cnt{$pepkey}++;

  if ( $type ne 'pv' && !$osw_altmod ) {
    $osw_altmod++ if $mseq =~ /\[\d+\]/;
  }

  # Do some clean-up on protein string (defined above)
  $protstr =~ s/\"//g;
  $protstr =~ s/^\d+\/*//;
  $protstr ||= 'DEFAULT';
  if ( $type eq 'os' ) {
    $protstr =~ s/;/,/g;
  }

  my @prot;
  if ( $protstr =~ /,/ ) {
    @prot = split( /,/, $protstr, -1 );
  } else {
    @prot = split( /\//, $protstr, -1 );
  }

# fragment, Q3 plus intensity 
#  my $fragkey = "$line[1] $line[5]";
#  my $fragkey = ( $type eq 'pv' ) ? "$line[1] $line[5]" : "$line[$cmap{ProductMz}] $line[$cmap{}]";
#  my $ion_s = ( $type eq 'pv' ) ? "$line[9]" : "$line[15]";

  $peps{t6_frg_s} ||= {};
  $peps{t6_frg_s}->{$ion_s}++ unless $ion_cnt{$pepkey} > 6;

  if ( $fragment > $precursor ) {
    $stats{fragment_above_precursor}++;
  }
  $stats{fragment}++;

  my $swakey = $pepkey;
  if ( $opts{swath_file} ) {
    if ( !$swaths{$swakey} ) {
      $swaths{$swakey} = [];
      for my $min ( keys( %swadefs ) ) {
        if ( $precursor >= $min && $precursor <= $swadefs{$min} ) {
          push @{$swaths{$swakey}}, { min => $min, max => $swadefs{$min} };
        }
      }
      if ( !scalar( @{$swaths{$swakey}} ) ) {
        $stats{swa_missing}++; 
        $problem_assays{$pepkey}++;
        next;
      } else {
        $stats{swa_defined}++ 
      }
    }
    my $is_bad = 0;
    for my $swa ( @{$swaths{$swakey}} ) {
      if ( $fragment >= $swa->{min} && $fragment <= $swa->{max} ) {
        print STDERR "$fragment ($precursor) in CONFLICT (suppressing further warnings)\n" if $opts{verbose} && !$stats{swa_conflict}; # Complain, but just once
        $stats{swa_conflict}++;
        $problem_assays{$pepkey}++;
        $is_bad++;
      } else {
        $stats{swa_ok}++;
      }
    }
    $stats{swa_conflict_assay} ||= {};
    $stats{swa_conflict_assay}->{$precursor}++ if $is_bad;
#    $stats{swa_conflict_precursor}++ if $is_bad;
    if ( abs( $precursor - $fragment ) <= 5 ) {
      $stats{swa_5}++;
    } elsif ( abs( $precursor - $fragment ) <= 25 ) {
      $stats{swa_25}++;
    }
  }

# Some hand-edited libraries might have blank lines...
  next if $line =~ /^\s*$/;

  if ( !defined $fragment || !defined $precursor ) {
    print STDERR "Unable to find prec a/o fragment ($precursor and $fragment) from $line\n";
    die;
  }

  $extrema{precursor_max} = $precursor if $precursor > $extrema{precursor_max};
  $extrema{fragment_max} = $fragment if $fragment > $extrema{fragment_max};
  $extrema{precursor_min} = $precursor if $precursor < $extrema{precursor_min};
  $extrema{fragment_min} = $fragment if $fragment < $extrema{fragment_min};


  if ( $curr && $curr ne $pepkey ) {

    my $fcnt = 0;
    for my $mz ( sort { $a <=> $b }( keys( %{$dta{frg}} ) ) ) {
      $peps{frg_i}->{cnt}++;
      $peps{frg_i}->{inten} += $dta{frg}->{$mz};
      last if $fcnt++ >= 5;
    }

    if ( $write_data ) {
      my $mw = sprintf( "%0.4f", $dta{pre}->{mz}/$dta{pre}->{z} );
      print MGF qq~BEGIN IONS
PEPMASS=$mw
CHARGE=$dta{pre}->{z}
TITLE=$pepkey
~;
      for my $mz ( sort { $a <=> $b }( keys( %{$dta{frg}} ) ) ) {
        print MGF join( "\t", $mz, $dta{frg}->{$mz} ) . "\n";
      }
      print MGF "END IONS\n";
    }
    if ( scalar(keys(%{$dta{frg}})) > 5 ) {
      $stats{frg_cnt_ok}++;
    } else {
      $stats{frg_cnt_short}++;
    }
    %dta = ( frg => {}, pre => {z => '', mz => ''} );
  }
  $curr = $pepkey;
  $dta{pre}->{mz} = $precursor;
  $dta{pre}->{z} = $pre_z;
  my $mz = sprintf( "%0.4f", $line[1]);
  my $int = $line[5];
  $dta{frg}->{$mz} = $int;

  $tmax{$pepkey} ||= { idx => 1,
                       max => 0,
                       max_idx => 1 };
  if ( $int > $tmax{$pepkey}->{max} ) {
    $tmax{$pepkey}->{max} = $int; 
    $tmax{$pepkey}->{max_idx} = $tmax{$pepkey}->{idx}; 
  }
  $tmax{$pepkey}->{idx}++;

  if ( $opts{assess_massdiff} ) {
    my $masskey = sprintf( "%0.4f", $precursor );
    $massmap{$pepkey} ||= {};
    my $skip = 0;
    my ( $pseq, $pchg ) = split( /_/, $pepkey );
    my $eseq = encode_modifications( $pseq );
#    print STDERR "$pseq becomes $eseq \n" if $pseq ne $eseq;
    my %ion2mz; 
    if ( $massmap{$pepkey}->{$masskey} ) {
      if ( !$massmap{$pepkey}->{$masskey}->{precursor_ok} ) {
        $problem_assays{$pepkey}++;
        
#        Misleading because known fragments labeled as na - revisit if performance becomes an issue
#        # Issue with this precursor already documented, no use checking fragment
#        $skip++ unless $opts{correct_mz};
      }
    } else {

      my $pmz = get_peptide_mass( $eseq, $pchg, 1 );
      my $ions = generate_ions( $pseq );
      #print "IONHASH: $pseq\t";
      #print Dumper(\$ions);
      for ( my $fchg = 1; $fchg <= 7; $fchg++ ) {    ##7 is maximum fragment charge we consider usually it is upto 3
        for my $yion ( keys( %{$ions->{y}} ) ) {
          my $ion_key = 'y' . $yion . '_' . $fchg;
          my $intermediatey = encode_modifications($ions->{y}->{$yion});
          $ion2mz{$ion_key} = get_peptide_mass($intermediatey, $fchg );  
        }
        for my $bion ( keys( %{$ions->{b}} ) ) {
          my $ion_key = 'b' . $bion . '_' . $fchg;
          my $intermediateb = encode_modifications($ions->{b}->{$bion});
          $ion2mz{$ion_key} = get_peptide_mass( $intermediateb, $fchg ) - (WATER_MASS)/$fchg;
         #  print "\n1.Gen ionkey $ion_key\t $ion2mz{$ion_key}, $ions->{b}->{$bion} seq: $intermediateb,bion  fchg $fchg\n";
      }    
      }

      my $delta = abs( $precursor - $pmz );
      if ( $delta ) {
        my $dawn = $delta * 200000;
        if ( $dawn > $precursor - 0 ) { # More than 5ppm different
          $massmap{$pepkey}->{$masskey}->{precursor_ok} = 0;
          $problem_assays{$pepkey}++;
          if ( $opts{print_mass_error} ) {
             print ME join( "\t", "Precursor", $pepkey, $eseq, $pchg, $precursor, $pmz, $delta, $dawn ) . "\n";
          }
          print STDERR "$pepkey has precursor error with $delta from $pmz and $precursor\n" if $opts{verbose};
          $stats{precursor_bad}++;
        
#        Misleading because known fragments labeled as na - revisit if performance becomes an issue
#        # Issue with this precursor already documented, no use checking fragment
#        $skip++ unless $opts{correct_mz};
        } else {
          $massmap{$pepkey}->{$masskey}->{precursor_ok} = 1;
          $stats{precursor_ok}++;
        }
      } else {
        $massmap{$pepkey}->{$masskey}->{precursor_ok} = 1;
        $stats{precursor_ok}++;
      }
    }
    unless ( $skip ) {
      if ( !defined $massmap{$pepkey}->{$masskey}->{peaks} ) {
        $massmap{$pepkey}->{$masskey}->{peaks} = \%ion2mz;
      }
    }
    my $peak_key = $ion_s . $ion_n . '_' . $frg_z;
#    if ( $massmap{$pepkey}->{$masskey}->{peaks}->{$ion_s . $ion_n . '_' . $frg_z} ) {
    if ( $massmap{$pepkey}->{$masskey}->{peaks}->{$peak_key} ) {
      my $tmz = $massmap{$pepkey}->{$masskey}->{peaks}->{$peak_key};
      if ( $lossType ) {
        $tmz -= $losses{$lossType}/$frg_z;
      }
      if ( $LTOS ) { 
        $tmz -= $losses{$LTOS}/$frg_z;
      }

      my $tdelta = abs( $tmz - $fragment );
      $stats{delta_cnt}++;
      $stats{delta_sum} += $tdelta;
      if ( $tdelta ) {
        if ( $tdelta * 1000000 > $fragment ) { # More than 1ppm different
          $stats{fragment_bad}++;
          $problem_assays{$pepkey}++;
          if ( $opts{print_mass_error} ) {
             print ME join( "\t", "Fragment", $pepkey, $ion_s . $ion_n . '_' . $frg_z, $tmz, $fragment, $tdelta, $lossType ) . "\n";
          }
        } else {
          $stats{fragment_ok}++;
        }
      } else {
        $stats{fragment_ok}++;
      }
    } else {
      print STDERR "Can't find $peak_key for $pepkey and $masskey\n";
#      die Dumper( $massmap{$pepkey}->{$masskey}->{peaks} );

      $stats{fragment_na}++;
    }
  }


  # Cache trans per pep, pep per prot
  my $mult = ( scalar( @prot ) > 1 ) ? 'mult' : 'sing';

  my $fwd;
  my $decoy;
  for my $prot ( @prot ) {
    $prots{$prot} ||= { sing => {}, mult => {}, stripped => {} };
    $prots{$prot}->{$mult}->{$pepkey}++;
    $prots{$prot}->{stripped}->{$seq}++;
    if ( $prot =~ /DECOY/ ) {
      $decoy++;
    } elsif ( $opts{alt_decoy} && $prot =~ /$opts{alt_decoy}/ ) {
      $decoy++;
    } else {
      $fwd++;
    }
  }
  if ( $decoy ) { 
    if ( $fwd ) { 
      $decoy{mixed}++;
    } else {
      $decoy{decoy}++;
    }
  } else {
    $decoy{fwd}++;
  }

  $peps{$mult}->{$pepkey}++;
  if ( !$ion_s || ($ion_s !~ /y/i && $ion_s !~ /b/i ) ) {
    $peps{ion_s}->{$pepkey}++;
  }

  if ( !$ion_n || $ion_n < 3  ) {
    $peps{ion_n}->{$pepkey}++;
  }

# 0    PrecursorMz [ 778.4129855 ]
# 1    ProductMz [ 498.26707303 ]
# 2    Tr_recalibrated [ 57.3 ]
# 3    transition_name [ 6_AAAAAAAAAAAAAAAASAGGK_2 ]
# 4    CE [ -1 ]
# 5    LibraryIntensity [ 7324.6 ]
# 6    transition_group_id [ 1_AAAAAAAAAAAAAAAASAGGK_2 ]
# 7    decoy [ 0 ]
# 8    PeptideSequence [ AAAAAAAAAAAAAAAASAGGK ]
# 9    ProteinName [ 1/P0CG40 ]
# 10   Annotation [ b7/-0.009,b14^2/-0.009,m10:16/-0.009 ]
# 11   FullUniModPeptideName [ AAAAAAAAAAAAAAAASAGGK ]
# 12   PrecursorCharge [ 2 ]
# 13   GroupLabel [ light ]
# 14   UniprotID [ 1/P0CG40 ]
# 15   FragmentType [ b ]
# 16   FragmentCharge [ 1 ]
# 17   FragmentSeriesNumber [ 7 ]


  if ( $is_irt{$seq} ) {
    $rt{irt}->{$seq}++;
    $irt_cnt++;
  }
  $rt{mseqs}->{$mseq} ||= {};
  $rt{mseqs}->{$mseq}->{$pre_z} ||= $rt;
}

  my @imsort= sort{ $a <=> $b } @imarray ;
  my $im_max= sprintf( '%0.4f', (pop@imsort));
  my $im_min= sprintf( '%0.4f', (shift@imsort));

  if ( !defined $idx{IonMobility} ) {
     $im_min = 'N/A';
     $im_max = 'N/A';
  }

for my $ext ( keys( %extrema ) ) {
  $extrema{$ext} = sprintf( "%0.1f", $extrema{$ext} );
}

my $tmax_sum;
my $tmax_cnt;
for my $key ( keys( %tmax ) ) {
  $tmax_cnt++;
  $tmax_sum += $tmax{$key}->{max_idx};
}
my $tmax_avg = sprintf( "%0.1f", $tmax_sum/$tmax_cnt );

# Finish with last accumulated pepion
close ILIB;

if ( $opts{print_mass_error} ) {
  close ME;
}

# RT data calculation
my $n_irt = scalar( keys( %{$rt{irt}} ) );

my @rt_all;
my @rt_two;
my @rt_three;
my %pepions;
for my $mseq ( keys( %{$rt{mseqs}} ) ) {
  for my $ch ( keys( %{$rt{mseqs}->{$mseq}} ) ) {
    $pepions{$mseq . '_' . $ch}++;
    push @rt_all, $rt{mseqs}->{$mseq}->{$ch};
  }
  next unless $rt{mseqs}->{$mseq}->{2} && $rt{mseqs}->{$mseq}->{3};
  push @rt_two, $rt{mseqs}->{$mseq}->{2};
  push @rt_three, $rt{mseqs}->{$mseq}->{3};
}

@rt_all = sort { $a <=> $b } ( @rt_all );
my $rt_min = $rt_all[0];
my $rt_max = $rt_all[$#rt_all];
my $rt_med = $rt_all[int($#rt_all/2)];

my $rsq = 'na';
my $rt_five = 'na';
if ( scalar( @rt_two ) < 8 ) {
  print STDERR "Too few points: ". scalar( @rt_two ) . " to run regression \n";
} else {
  if ( $opts{rt_stats} ) {
    my $rt = $opts{output_file} . ".RT";
    if ( -e $rt ) { 
      print STDERR "RT stats file $rt exists, cannot overwrite\n";
      exit;
    }
    print STDERR "Printing +2/+3 pairs to $rt\n";
    open RT, ">$rt";
  }

  my $reg;
  if ( $has_regression ) {
    $reg = Statistics::Regression->new( 'Y', [ 'Z', 'X' ] );
  }
  my $imperfect = 0;
  my $ok = 0;
  my $ok_delta = 5;
  for ( my $i = 0; $i <= $#rt_two; $i++ ) {
    print RT join( "\t", $rt_two[$i], $rt_three[$i] ) . "\n" if $opts{rt_stats};

    if ( $has_regression ) {
      $reg->include( $rt_two[$i], [ 1.0, $rt_three[$i] ] );
    }
    $imperfect++ unless $rt_two[$i] == $rt_three[$i];
    $ok++ if abs( $rt_two[$i] - $rt_three[$i] ) <= $ok_delta;
  }
  close RT if $opts{rt_stats};
  $rsq = 1.000;
  if ( !$has_regression ) {
    $rsq = 'NoStats';
    print STDERR "Unable to run regression without Statistics::Regression\n" if $opts{verbose};
  } elsif ( $imperfect ) {
    print STDERR "Running regression with " . scalar( @rt_two ) . " Total points\n" if $opts{verbose};
    $rsq = sprintf( "%0.3f", $reg->rsq() );
  } else {
    print STDERR "Skipping regression with " . scalar( @rt_two ) . " identical values\n" if $opts{verbose};
  }
  if ( $rt_max < 600 ) {
    print STDERR "Assuming rt in minutes due to $rt_max\n";
  } else {
    print STDERR "Assuming rt in seconds\n";
    $ok_delta *= 60;
  }
  $rt_five = sprintf( "%0.1f", ($ok/scalar(@rt_two))*100 );
}

my $dtot;
for my $dtype ( qw( fwd decoy mixed ) ) {
  $dtot += $decoy{$dtype};
}
my $decoy_pct = ( $decoy{decoy} ) ? sprintf( "%0.1f", 100*($decoy{decoy}/$dtot)) : 0;
my $mixed_pct = ( $decoy{mixed} ) ? sprintf( "%0.1f", 100*($decoy{mixed}/$dtot)) : 0;
my $fwd_pct = ( $decoy{fwd} ) ? sprintf( "%0.1f", 100*($decoy{fwd}/$dtot)) : 0;


my $fcnt = 0;
for my $mz ( sort { $a <=> $b }( keys( %{$dta{frg}} ) ) ) {
  $peps{frg_i}->{cnt}++;
  $peps{frg_i}->{inten} += $dta{frg}->{$mz};
  last if $fcnt++ >= 5;
}

if ( $write_data ) {
  my $mw = sprintf( "%0.4f", $dta{pre}->{mz} );
  print MGF qq~BEGIN IONS
PEPMASS=$mw
CHARGE=$dta{pre}->{z}
TITLE=$pepkey
~;
  for my $mz ( sort { $a <=> $b }( keys( %{$dta{frg}} ) ) ) {
    print MGF join( "\t", $mz, $dta{frg}->{$mz} ) . "\n";
  }
  print MGF "END IONS\n";
  close MGF;
}


$peps{stripped_len} = {};
my %stripped;
for my $type ( qw( sing mult ) ) {
  for my $pep ( keys( %{$peps{$type}} ) ) {
    $stats{$type}->{pepcnt}++;
    my $tpep = $pep;
    $tpep =~ s/\[[^]]+\]//g;
    $tpep =~ s/\([^)]+\)//g;
    $tpep =~ s/\d//g;
    $tpep =~ s/_//g;
    $tpep =~ s/-//g;
    if ( $tpep !~ /^\w+$/ ) {
      die "Issue with $pep: stripped value is $tpep\n";
    }
#    $stats{ 'str_' . $type };
    $stripped{$tpep}++;
    my $tlen = length( $tpep );
    $peps{stripped_len}->{$tlen}++;
    $stats{$type}->{len} += $tlen;
    $stats{$type}->{frags} += $peps{$type}->{$pep};
  }
}
my $mcstats = $opts{output_file} . ".mcstats.txt";
open MCSTATS, ">$mcstats";
print MCSTATS "trimmedpeptide\tlength\tMCcount\n";
# Which ppeps were seeen?  
my %seen = ( prots => {}, mprots => {}, peps => {}, mpeps => {} );
for my $sp ( keys( %stripped ) ) {
  if ( $ppeps{peps}->{$sp} ) {
    $seen{peps}->{$sp}++;
  } else {
    $seen{mpeps}->{$sp}++;
  }
  my $splen = length($sp);
  if ( $sp =~ /[KR](?!P)/ ) {
	$sp=~ s/^.//; #remove firstchar
	chop($sp); #remove last char As terminal K and R are not missed cleavages
	$sp =~ s/[RK](?!P)/z/g;
	my $spcount= ($sp =~ tr/z/z/);
	if ($spcount > 0){
 		$stats{mc}++;
		print MCSTATS "$sp\t$splen\t$spcount\n";
	  }
  }
}

#for my $p ( sort( keys( %prots ) ) ) {
#  my $pcnt = scalar( keys( %{$prots{$p}->{stripped}} ) );
#  print join( "\t", $p, $pcnt ) .   "\n";
#}
#exit;

# How many prots were seen
my %pepsperprot;
my @protcnts;
my $prottot;
my %protstats;
for my $prot ( keys( %prots ) ) {
  $pepsperprot{$prot} = scalar( keys( %{$prots{$prot}->{mult}} ) );
  $pepsperprot{$prot} += scalar( keys( %{$prots{$prot}->{sing}} ) );
  if ( $pepsperprot{$prot} < 0 ) {
    print STDERR "$prot\n";
#    print STDERR Dumper( $prots{$prot} );
#    exit;
  }
  $prottot += $pepsperprot{$prot};
  push @protcnts, $pepsperprot{$prot};
  $protstats{$pepsperprot{$prot}}++;
  if ( $ppeps{prots}->{$prot} ) {
    $seen{prots}->{$prot}++;
  } elsif ( $prot =~ /RT-Cal/ ) {
    $seen{prots}->{$prot}++;
  } else {
    $seen{mprots}->{$prot}++;
  }
}


my @sorted = sort( { $a <=> $b } @protcnts );
my $half = int( $#protcnts/2 + 1 );
my $med = $sorted[$half];
my $mean = sprintf( "%0.1f", $prottot/(1+$#protcnts) );
my $devs;
for ( my $i = 0; $i <= $#protcnts; $i++ ) {
  $devs += ( $protcnts[$i] - $mean )**2;
}
my $stddev = sprintf( "%0.1f", ($devs/(1+$#protcnts))**0.5 );

my $overprots = 0;
for my $cnt ( sort { $b <=> $a } keys( %protstats ) ) {
  last if $cnt < ( $mean + ( 3 * $stddev ) );
  $overprots += $protstats{$cnt};
}


$stats{totpeps} = scalar( keys( %stripped ) );
$stats{sprots} = scalar( keys( %{$seen{prots}} ) );
$stats{mprots} = scalar( keys( %{$seen{mprots}} ) );
if ( $stats{libprots} < 1 ) {
  $stats{sprots} = 'N/A';
  $stats{mprots} = 'N/A';
}

if ( $opts{print_ux} ) {
  my $ux = $opts{output_file} . ".UX_prots";
  if ( -e $ux ) { 
    print STDERR "Unexplained proteins file $ux exists, cannot overwrite\n";
    exit;
  }
  print STDERR "Printing 'unexplained' proteins to $ux\n";
  open UX, ">$ux";
  for my $p ( sort( keys( %{$seen{mprots}} ) ) ) {
    print UX "$p\n";
  }
  close UX;
}


#$stats{allprots} = $stats{sprots} + $stats{mprots};
$stats{allprots} = scalar( keys( %prots ) );

#print join( "\n", sort( keys( %{$seen{mprots}} ) ) ) . "\n";
$stats{speps} = scalar( keys( %{$seen{peps}} ) );
$stats{mpeps} = scalar( keys( %{$seen{mpeps}} ) );


#  print join( "\t", qw( Library Pepions Ptyp Mult Ptyp_perc Mult_perc alt_ser low_nr alen afrg afrglen Tot_peps MC_peps Lib_peps Seen_peps Lib_prots Seen_prots UX_prots ) ) . "\n";
# print join( "\t", qw( Library Pepions Ptyp Mult alt_ser low_nr alen afrg afrglen Ptyp_perc Mult_perc alt_ser_perc low_nr_perc ) ) . "\n";

  $stats{sing}->{pepcnt} ||= 0;
  $stats{mult}->{pepcnt} ||= 0;
  $stats{sing}->{len} ||= 0;
  $stats{mult}->{len} ||= 0;
  my $pepcnt = $stats{sing}->{pepcnt} + $stats{mult}->{pepcnt};
  for my $type ( qw( sing mult ) ) {
    $stats{$type}->{perc} = sprintf( "%0.2f", 100 * $stats{$type}->{pepcnt} / $pepcnt );
  }
  $stats{mult}->{frags} ||= 0;
  $stats{sing}->{frags} ||= 0;
  my $tot_len = $stats{sing}->{len} + $stats{mult}->{len};
  my $tot_frags = $stats{sing}->{frags} + $stats{mult}->{frags};

  my $alen = sprintf( "%0.1f", $tot_len/$pepcnt );
  my $afrg = sprintf( "%0.2f", $tot_frags/$pepcnt );
  my $afrglen = sprintf( "%0.2f", $afrg/$alen );


  # depricate s_cnt (alt series), instead do y and b seriest cnt/perc
#  my $s_cnt = scalar(keys(%{$peps{ion_s}} ) ); 
#  my $s_perc = ( $s_cnt ) ? '0.00' : sprintf( "%0.2f", 100*($s_cnt/$pepcnt));
  my $y_cnt = $peps{frg_s}->{y} || 0; 
  my $b_cnt = $peps{frg_s}->{b} || 0; 
  my $y_perc = ( $tot_frags ) ? sprintf( "%0.1f", ($y_cnt/$tot_frags)*100 ) : 0.0;
  my $b_perc = ( $tot_frags ) ? sprintf( "%0.1f", ($b_cnt/$tot_frags)*100 ) : 0.0;

  my $top_y = $peps{t6_frg_s}->{y} || 0;
  my $top_b = $peps{t6_frg_s}->{b} || 0;
  my $T6_y_perc = ( $top_y || $top_b ) ? sprintf( "%0.1f", (100*$top_y/($top_y+$top_b)) ) : 0.0;

  my $ainten = ( $peps{frg_i}->{cnt} ) ? sprintf( "%0.1f", $peps{frg_i}->{inten}/$peps{frg_i}->{cnt} ) : '0.0';

  my $tot_pre = 0;
  for my $z ( keys( %{$peps{pre_z}} ) ) {
    next if $z eq 'peps';
    $tot_pre += $peps{pre_z}->{$z};
  }
  my $pre_2 = ( $tot_pre && $peps{pre_z}->{2} ) ? sprintf( "%0.1f", 100*($peps{pre_z}->{2}/$tot_pre) ) : '0.0';
  my $pre_3 = ( $tot_pre && $peps{pre_z}->{3} ) ? sprintf( "%0.1f", 100*($peps{pre_z}->{3}/$tot_pre) ) : '0.0';

  # The number of missing or < 3 frg numbers
  my $n_cnt = scalar(keys(%{$peps{ion_n}})); 

  my $short_perc = ( $stats{frg_cnt_short} ) ? sprintf( "%0.1f", ($stats{frg_cnt_short}/($stats{frg_cnt_short}+$stats{frg_cnt_ok})*100 )) : 0; 

  my %mods;
  my %mcnt;
  my %mseen;


  for my $mpep ( keys( %{$peps{sing}}), keys(%{$peps{mult}} ) ) {

    # Only count one charge state
    $mpep =~ s/_\d+$//;
    next if $mseen{$mpep}++;
    $mcnt{all}++;
    my $regex = '(\w?\[[^\]]+\])'; 
    if ( $mpep =~ /UniMod/ ) {
      $regex = '(\w?\(UniMod:\d+\))';
    } else {
      next unless $mpep =~ /\[/;
    }
    my $has_mods = 0;
    for my $m ( $mpep =~ /$regex/ig ) {
      $mods{$m}++;
      $has_mods++;
    }
    $mcnt{mod}++ if ( $has_mods );
  }
  my $mcnt = 0;
  $peps{mods} = { none => $mcnt{all} - $mcnt{mod}, any => $mcnt{mod} };
  for my $mod ( sort( keys( %mods ) ) ) {
    $mcnt += $mods{$mod};
    $peps{mods}->{$mod} = $mods{$mod};
  }
  my $mpct = sprintf( "%0.1f", (($mcnt{mod}/$mcnt{all})*100) );
  print STDERR "Saw $mcnt{mod} peptides with modifications, $mcnt total mods in those peptides, and $mcnt{all} distinct modified peptides (includes non-mod)\n";

  my $tot_fragment = $stats{fragment};
  my $fragment_above_pct = ( $stats{fragment_above_precursor} ) ? sprintf( "%0.3f", $stats{fragment_above_precursor}/$stats{fragment}) : 0;

  my @checks;
  if ( $opts{assess_massdiff} ) {
    @checks = ( qw( precursor_ok precursor_bad fragment_ok fragment_bad fragment_na fragment_avg_mdiff ) );
  }


  my $problem_assays = scalar( keys( %problem_assays ) );
  my $n_perc = ( $n_cnt ) ? '0.00' : sprintf( "%0.2f", 100*($n_cnt/$pepcnt));
  my $pepion_cnt = scalar(keys(%pepions)) || 0;
  my @headings = ( qw( library
                        format
                        pepions
                        fragments
                        ptp_percent
                        shared_percent
                        shared_pepions
                        peptides
                        mod_peps
                        mod_percent
                        total_mods
                        chg_2
                        chg_3
                        precursor_min
                        precursor_max
                        fragment_min
                        fragment_max
                        avg_len
                        avg_num_frags
                        avg_frag_len
                        short_perc
                        fragment_above_precursor
                        y_perc
                        b_perc
                        t6_y_perc
                        avg_intensity
                        rt_min
                        rt_max
                        rt_med
                        rt_rsq
                        rt_five
                        n_irt
                        irt_cnt
                        low_nr
                        db_peps
                        seen_peps
                        mc_peps
                        db_prots
                        lib_prots
                        seen_prots
                        ux_prots
                        decoy_pct
                        mixed_pct
                        fwd_pct
                        med_ppp
                        mean_ppp
                        stddev_ppp
                        3_sigma_ppp
                        max_intensity_idx
                        ), @checks, @swaths, 'problem_assays','im_min', 'im_max');


  my @checkvals = ();
  if ( $opts{assess_massdiff} ) {
    $stats{fragment_avg_mdiff} = ( $stats{delta_cnt} ) ? sprintf( "%0.4f", $stats{delta_sum}/$stats{delta_cnt}) : 0;
    for my $k ( @checks ) {
      push @checkvals, $stats{$k} || 0;
    }
  }

  my @swathvals = ();
  if ( $opts{swath_file} ) {
    for my $skey ( @swaths ) {
      if ( $skey eq 'swa_conflict_assay' ) {
        push @swathvals, scalar(keys(%{$stats{$skey}} ) );
      } else {
        push @swathvals, $stats{$skey};
      }
    }
  }

  $rt_min = sprintf( "%0.1f", $rt_min );
  $rt_max = sprintf( "%0.1f", $rt_max );
  $rt_med = sprintf( "%0.1f", $rt_med );

  my %type2sw = ( os => 'OpenSWATH', 
                  pv => 'Peakview', 
                  sn => 'Spectronaut',
                  pf => 'Prosit/Fusion' );

  my @out_values = ( 
              $libname,
              $type2sw{$type},
              $pepion_cnt,
              $tot_fragment,
              $stats{sing}->{perc},
              $stats{mult}->{perc},
              $stats{mult}->{pepcnt},
              $stats{totpeps},
              $mcnt{all},
              $mpct,
              $mcnt,
              $pre_2,
              $pre_3,
              $extrema{precursor_min},
              $extrema{precursor_max},
              $extrema{fragment_min},
              $extrema{fragment_max},
              $alen,
              $afrg,
              $afrglen,
              $short_perc,
              $fragment_above_pct,
              $y_perc,
              $b_perc,
              $T6_y_perc,
              $ainten,
              $rt_min,
              $rt_max,
              $rt_med,
              $rsq,
              $rt_five,
              $n_irt,
              $irt_cnt,
              $n_cnt,
              $stats{libpeps},
              $stats{speps},
              $stats{mc},
              $stats{libprots},
              $stats{allprots},
              $stats{sprots},
              $stats{mprots},
              $decoy_pct,
              $mixed_pct,
              $fwd_pct,
              $med,
              $mean,
              $stddev,
              $overprots,
# die "over is $overprots med is $med, mean is $mean, and stddev is $stddev\n";
              $tmax_avg,
              @checkvals,
              @swathvals,
              $problem_assays,
	      $im_min,
	      $im_max
             );

  my $defs = get_coldefs();
  my @defs;
  for my $head ( @headings ) {
    push @defs, $defs->{$head} || '';
  }

#  if ( !$opts{redirect} && !($opts{filter_assays} || $opts{correct_mz} ) ) {
  if ( !$opts{redirect} ) {
    open OUT, ">$opts{output_file}.QC.tsv";
    print STDERR "Opening $opts{output_file}.QC.tsv for writing!\n";
    if ( $opts{invert_output} ) {
      for ( my $i = 0; $i <= $#headings; $i++ ) {
        print OUT join( "\t", $headings[$i], $out_values[$i] );
        if ( $opts{coldefs} ) {
          print OUT "\t$defs[$i]";
        }
        print OUT "\n";
      }
    } else {
      print OUT join( "\t", @headings ) . "\n";                     
      print OUT join( "\t", @out_values ) . "\n";                     
      if ( $opts{coldefs} ) {
        print OUT join( "\t", @defs ) . "\n";
      }
    }
    close OUT;
#  } elsif ( !($opts{filter_assays} || $opts{correct_mz} ) ) {
  } else {
    if ( $opts{invert_output} ) {
      for ( my $i = 0; $i <= $#headings; $i++ ) {
        print join( "\t", $headings[$i], $out_values[$i] );
        if ( $opts{coldefs} ) {
          print "\t$defs[$i]";
        }
        print "\n";
      }
    } else {
      print join( "\t", @headings ) . "\n";                     
      print join( "\t", @out_values ) . "\n";                     
      if ( $opts{coldefs} ) {
        print join( "\t", @defs ) . "\n";
      }
    }
  }

  if ( !$problem_assays ) {
    print STDERR "No problem assays found!\n";
  } else { 
    print STDERR "$problem_assays problem assays found!\n";
  }

  # User has requested a clean library
  if ( $opts{filter_assays} && $problem_assays ) {

    my $clean = $opts{ion_library} . ".clean";
    my $problem = $opts{ion_library} . ".problem";
    if ( -e $problem ) { 
      print STDERR "problem library $problem exists, cannot overwrite\n";
    } elsif ( -e $clean ) { 
      print STDERR "clean library $clean exists, cannot overwrite\n";
    } else {
      open ILIB, $opts{ion_library}|| die;

      print STDERR "Printing problem assays to $problem\n";
      print STDERR "Printing good assays to $clean\n";
      open CLEANLIB, ">$clean" || die;
      open PROBLEMLIB, ">$problem" || die;
      my $head = 1;
      while ( my $line = <ILIB> ) {
        if ( $head ) {
          print CLEANLIB $line;
          print PROBLEMLIB $line;
          $head = 0;
          next;
        }
        chomp $line;
#        my @line = split( /\t/, $line, -1 );
        my @line = parse_line( $line );

        my $pre_z = $line[$idx{pre_z}];
        my $mseq = $line[$idx{mseq}];

        my $pepkey = $mseq . '_' . $pre_z;

        if ( $problem_assays{$pepkey} ) {
          print PROBLEMLIB "$line\n";
        } else {
          print CLEANLIB "$line\n";
        }
      }
      close ILIB;
      close CLEANLIB;
      close PROBLEMLIB;
    }
  }

  # User has requested an mz-corrected library
  if ( $opts{correct_mz} ) {

    if ( $stats{fragment_na} ) {
      print STDERR "Some ($stats{fragment_na}) unknown fragment types were found, unable to do mz correction\n";
      exit;
    }
    unless ( $stats{fragment_bad} || $stats{precursor_bad} ) {
      print STDERR "No m/z discrepancy with either precursor or fragment ions was found, cntl-d to stop run (10 seconds)"; 
      sleep 10;
    }

    my $mz_corr = $opts{ion_library} . ".mz_corrected";
    if ( -e $mz_corr ) { 
      print STDERR "m/z corrected library $mz_corr exists, cannot overwrite\n";
    } else {
      open ILIB, $opts{ion_library}|| die;

      print STDERR "Printing mz-adjusted assays to $mz_corr\n";
      open MZLIB, ">$mz_corr" || die;
      my $head = 1;
      my %mz_stats;
      while ( my $line = <ILIB> ) {
        if ( $head ) {
          print MZLIB $line;
          $head = 0;
          next;
        }
        chomp $line;
#        my @line = split( /\t/, $line, -1 );
        my @line = parse_line( $line );

        # Pull out needed info
        my $precursor = sprintf( "%0.4f", $line[$idx{precursor}] );
        my $fragment = sprintf( "%0.4f", $line[$idx{fragment}] );
        my $pre_z = $line[$idx{pre_z}];
        my $frg_z = $line[$idx{frg_z}];
        my $ion_s = $line[$idx{ion_s}];
	# Ion series
	##++++ 
	#OpenSwath library has y-17, y-18, y-64 neutral losses for HN3, Water and oxidized methionine
	# so get rid of the -17, -18, -64 and consider these neutral losses in calculations.

	my $LTOS;
	if ($ion_s =~ /(-17)/)  {
		$LTOS = '-17';
	} elsif ($ion_s =~ /(-18)/)  {
		$LTOS = '-18';
	} elsif ($ion_s =~ /(-64)/)  {
		$LTOS = '-64';
	}else {
		$LTOS = '';
		}
	$ion_s =~ s/[-17 -18 -64]//g; #get rid of the -17, -18, -64 

	my $ion_n = $line[$idx{ion_n}];
        my $mseq = $line[$idx{mseq}];
        $mseq =~ s/^_//g;
        $mseq =~ s/_$//g;
        my $ion_key = $ion_s . $ion_n . '_' . $frg_z;
        my $lossType = ( defined $idx{lossType} ) ? $line[$idx{lossType}] : '';

        my $pepkey = $mseq . '_' . $pre_z;
        my $eseq = encode_modifications( $mseq );
        my $pmz = get_peptide_mass( $eseq, $pre_z );
        my $imz = $massmap{$pepkey}->{$precursor}->{peaks}->{$ion_key};

        # Adjust for neutral losses (if any)
        $imz -= $losses{$lossType}/$frg_z if ( $lossType );
	$imz -= $losses{$LTOS}/$frg_z if ( $LTOS );
#        print STDERR qq~ PRE: $precursor PRZ: $pre_z FRG: $fragment FRZ: $frg_z MSQ: $mseq PKY: $pepkey INS: $ion_s INN: $ion_n INK: $ion_key pMZ: $pmz fMZ: $imz ~;          

        if ( !$pmz ) {
          die "Unable to calculate precursor m/z from $line\n";
        } elsif ( !$imz ) {
          die "Unable to calculate fragment ion m/z from $line\n";
        }
        if ( $pmz != $precursor ) {
          $mz_stats{prec_changed}++;
        } else {
          $mz_stats{prec_ok}++;
        }
        if ( $imz != $fragment ) {
          $mz_stats{frag_changed}++;
        } else {
          $mz_stats{frag_ok}++;
        }

        # Calc and assign fragment m/z
        $line[$idx{precursor}] = $pmz;
        $line[$idx{fragment}] = $imz;

        if ( $is_CSV ) {
          print MZLIB $quote_char . join( $quote_char . ',' . $quote_char, @line ) . $quote_char . "\n";
        } else {
          print MZLIB join( "\t", @line ) . "\n";
        }
      }
      close ILIB;
      close MZLIB;
      for my $s ( sort( keys( %mz_stats ) ) ) {
        print STDERR "$s\t$mz_stats{$s}\n";
      }
    }
  }

  my $t1 = time();
  my $runtime = $t1 - $t0;
  print STDERR "Finished run in $runtime seconds\n";
  exit unless $opts{full_stats};

  my $fullstats = $opts{output_file} . ".fullstats";
  if ( -e $fullstats ) { 
    print STDERR "full_stats file $fullstats exists, cannot overwrite\n";
    exit;
  }
  print STDERR "Printing full stats to $fullstats\n";

  open FULL, ">$fullstats";
  for my $key ( qw( frg_s mods ) ) {
    my $line = "$key:";    
    my $sep = '';
    for my $val ( sort ( keys( %{$peps{$key}} ) ) ) {
      my $item = "$sep$val=$peps{$key}->{$val}";
      $item =~ s/:/_/g;
      $line .= $item;
      $sep = ',';
    }
    print FULL "$line\n";
  }

  
  for my $key ( qw( pre_z frg_z frg_n stripped_len ) ) {
    my $line = "$key:";    
    my $sep = '';
    my @keys = sort { $a <=> $b } keys( %{$peps{$key}} );
    my $min = $keys[0];
    my $max = $keys[$#keys];
    for ( my $idx = $min; $idx <= $max; $idx++ ) {
      if ( $opts{zero_pad} ) {
        $peps{$key}->{$idx} ||= 0;
      } else {
        next unless defined $peps{$key}->{$idx};
      }
      $line .= "$sep$idx=$peps{$key}->{$idx}";
      $sep = ',';
    }
    print FULL "$line\n";
  } 
  my $pepsing = $peps{sing}; 
  my $pepmult = $peps{mult};
  my %pepchg;
 my @pep = (keys %{$pepsing}, keys %{$pepmult}); #works with all perl versions 
 foreach $_(@pep){ #works with all perl versions
    my ( $mseq, $chg ) = split( /_/, $_ ); #works with all perl versions
    $pepchg{$chg}++;
  }
  my $sep = '';
  my @keys = sort { $a <=> $b } keys( %pepchg );
  my $min = $keys[0];
  my $max = $keys[$#keys];
  my $line = 'Distinct_pre_z:';
  for ( my $idx = $min; $idx <= $max; $idx++ ) {
    if ( $opts{zero_pad} ) {
      $pepchg{$idx} ||= 0;
    } else {
      next unless defined $pepchg{$idx};
    }
    $line .= "$sep$idx=$pepchg{$idx}";
    $sep = ',';
  }
  print FULL "$line\n";

# This section causes the numbers > 5 to be summed into the 5 bin.
#  $protstats{$pepsperprot{$prot}}++;
  my %top5;
  for my $p ( keys( %protstats ) ) {
    if ( $p >= 5 ) {
      $top5{5} += $protstats{$p};
    } else {
      $top5{$p} = $protstats{$p};
    }
  }
#  %protstats = %top5;

 
  my $sep = '';
  my @keys = sort { $a <=> $b } keys( %protstats );
  my $min = $keys[0];
  my $max = $keys[$#keys];
  my $line = 'PepIonsPerProtein:';
  for ( my $idx = $min; $idx <= $max; $idx++ ) {
    if ( $opts{zero_pad} ) {
      $protstats{$idx} ||= 0;
    } else {
      next unless defined $protstats{$idx};
    }
    $line .= "$sep$idx=$protstats{$idx}";
    $sep = ',';
  }
  print FULL "$line\n";

  my %real_ppp;
  for my $prot ( keys( %prots ) ) {
    my $scnt = scalar( keys( %{$prots{$prot}->{stripped}} ) );
    # This statement causes the numbers > 5 to be summed into the 5 bin.
    # $scnt = 5 if $scnt > 5;
    $real_ppp{$scnt}++;
  }
  my $sep = '';
  my @keys = sort { $a <=> $b } keys( %real_ppp );
  my $min = $keys[0];
  my $max = $keys[$#keys];
  my $line = 'PepsPerProtein:';
  for ( my $idx = $min; $idx <= $max; $idx++ ) {
    if ( $opts{zero_pad} ) {
      $real_ppp{$idx} ||= 0;
    } else {
      next unless defined $real_ppp{$idx};
    }
    $line .= "$sep$idx=$real_ppp{$idx}";
    $sep = ',';
  }
  print FULL "$line\n";
      
  
#  $precursorstats{$pepsperprot{$prot}}++;
  my $sep = '';
  my %pre_bin;
  for my $pr ( keys( %precursorstats ) ) {
    for ( my $bin = 50; $bin <= 4000; $bin += 50 ) {
      if ( $bin > $pr ) {
        $pre_bin{$bin}++;
        last;
      }
    }
  }
  my $line = 'PrecusorCounts:';
  for my $pre ( sort { $a <=> $b } keys(%pre_bin)  ) {
    $line .= "$sep$pre=$pre_bin{$pre}";
    $sep = ',';
  }
  print FULL "$line\n";
  
#  $ion_cnt{pepkey}++; 
  my $sep = '';
  my %cnt_bins;
  for my $mpep ( keys( %ion_cnt ) ) {
    $cnt_bins{$ion_cnt{$mpep}}++;
  }
  my @keys = sort { $a <=> $b } keys( %cnt_bins );
  my $min = $keys[0];
  my $max = $keys[$#keys];
  my $line = 'FragmentsPerPrecursor:';
  for ( my $idx = $min; $idx <= $max; $idx++ ) {
    if ( $opts{zero_pad} ) {
      $cnt_bins{$idx} ||= 0;
    } else {
      next unless defined $cnt_bins{$idx};
    }
    $line .= "$sep$idx=$cnt_bins{$idx}";
    $sep = ',';
  }
  print FULL "$line\n";

  close FULL;
# End MAIN program #

#+ 
# Simple CSV parser
#-
sub parse_csv {
  return quotewords( ",", 0, $_[0] );
}

#+ 
# Line parser
#-
sub parse_line {
  my $line = shift;
  if ( $is_CSV ) {
    return parse_csv( $line );
  } else {
    return split( "\t", $line, -1 );
  }
}

#+
# Routine to get index of specific cmap fields 
#-
sub find_index {
  my $names = shift;
  my $target = shift;
  my $fail_ok = shift || 0;
  my $idx;
  for my $name ( @{$names} ) {
    if ( defined( $cmap{$name} ) ) {
      return $cmap{$name};
    }
  }
  return if $fail_ok;
  my $opts = join( ',', @{$names} );
  print STDERR "Unable to find an index for $target in $opts; printing debug info\n\n";
  $opts{debug}++;
}

#+
# routine to read and validate options
#-
sub read_options {

  # Local hash of options
  my %opts;

  GetOptions(\%opts,"ion_library:s",
                    "alt_decoy:s",
                    "peptide_file:s",
                    "write",
                    "output_file:s",
                    "filter_assays",
                    "correct_mz",
                    "full_stats",
                    "rt_stats",
                    "verbose", 
                    'assess_massdiff', 
                    'print_mass_error', 
                    'skip_decoys', 
                    'swath_file:s', 
                    'coldefs',
                    'debug',
                    'print_ux', 
                    'invert_output', 
                    'neutral_loss', 
                    'zero_pad',
                    'help' ) || die "$'";

  $opts{mono} = { G => 57.021464,
                  D => 115.02694,
                  A => 71.037114,
                  Q => 128.05858,
                  S => 87.032029,
                  K => 128.09496,
                  P => 97.052764,
                  E => 129.04259,
                  V => 99.068414,
                  M => 131.04048,
                  T => 101.04768,
                  H => 137.05891,
                  C => 103.00919,
		  U => 150.95363,#U = selenocysteine
                  F => 147.06841,
                  L => 113.08406,
                  R => 156.10111,
                  I => 113.08406,
                  N => 114.04293,
                  Y => 163.06333,
                  W => 186.07931,
		  X => 0
		  };
   
  my $err = '';

  # Check for required params
  for my $opt ( qw( ion_library  ) ) {
    $err .= "Missing required option $opt\n" unless $opts{$opt};
  }

  if ( $opts{help} ) {
    print_usage( "" );
  } elsif ( $err ) {
    print_usage( $err );
  }

  # Make sure ion library file exists
  if ( ! -e $opts{ion_library} ) {
    print_usage( "File $opts{ion_library} does not exist\n" );
  }

  # Determine filename base
  if ( !$opts{output_file} ) {
    if ( -t STDOUT ) {
      my $qc = "$opts{ion_library}.QC.tsv";
      if ( -e $qc ) {
        print STDERR "QC file $qc exists, cannot overwrite - printing to STDOUT\n";
      } else {
         $opts{output_file} = $qc;
      }
    } else {
      $opts{redirect}++;
    }
    $opts{output_file} = $opts{ion_library};
  }


  if ( $opts{filter_assays} ) {
    for my $o ( qw( correct_mz full_stats rt_stats print_ux ) ) {
      die "filter_assays is incompatible with correct_mz, full_stats, rt_stats, and print_ux" if $opts{$o};
    }
    if ( !$opts{assess_massdiff} ) {
      print STDERR "filter_assays option requires assess_massdiff, enabling\n";
      $opts{assess_massdiff}++;
    }
  } elsif ( $opts{correct_mz} ) {
    for my $o ( qw( full_stats rt_stats print_ux ) ) {
      die "correct_mz is incompatible with full_stats, rt_stats, and print_ux" if $opts{$o};
    }
    if ( !$opts{assess_massdiff} ) {
      print STDERR "correct_mz option requires assess_massdiff, enabling\n";
      $opts{assess_massdiff}++;
    }
  }
   
  return ( %opts );
}


#+
#  Routine to print usage statement if requested or error found
#-
sub print_usage {
  my $msg = shift || "";
  my $prog = basename ( $0 );

  print <<"  EOM";

  $msg
  Usage: $prog --ion_library libfile [ --peptide_file pepfile  --output output_file ]

  # Required
  ion_library    Path to ion library file in Peakview/OpenSWATH/Spectronaut/Prosit format (required)

  # Commonly used
  assess_massdiff   Compare library masses to theoretical monoisotopic values
  c, coldefs        print out column definitions
  f, full_stats     Print file (output_file.fullstats)
  h, help           Print this usage information and exit 
  invert_output     Invert output, 3 columns by X rows
  o, output_file    File (base) to which to write data; given MyOut, generates 
                    MyOut.QC, MyOut.fullstats, etc. If not specified, library name 
                    is used.
  peptide_file      Path to peptide digest file, format is protein(s)\tpeptide
  swath_file        Path to file of SWATH definitions, format min_m/z\tmax_m/z

  # Generate filtered/corrected library
  correct_mz        Will compute precursor and fragment masses for each entry
                    in library and print new library:
                    ion_library.mz_corrected
                    Requires use of --assess, precludes use of 
                    filter_assays, rt_stats, full_stats, print_ux 
  filter_assays     Assesses each assay, and removes any that have a 
                    problem (m/z error, discrepancy from SWATH file).
                    Creates 2 new libraries, one with all passing assays and 
                    another with all failed (problem) assays:
                    ion_library.clean 
                    ion_library.problem
                    Requires use of --assess, precludes use of correct_mz, 
                    rt_stats, full_stats, print_ux

  # Other
  alt_decoy         Alternate decoy prefix (default is DECOY)
  debug             Print parsed/library values in case of format issues
  print_mass_error  Print out ions whose m/z values differ significantly from 
                    theoretical
  print_ux          Print unexplained proteins (seen in ion library but not 
                    in peptide_file if provided
  rt_stats          Print file of +2/+3 RTs for analysis to output_file.RT,
                    only if enough for regression
  skip_decoys       Skip DECOY entries when computing statistics
  v, verbose        Verbose output mode
  write             Write  MGF file with info from each spectrum (rare)
  zero_pad          Print intermediate values=0 with full_stats.
  EOM

  exit;

}


#+
#  Routine to generate output column definitions
#-
sub get_coldefs {
  my %defs = (
library => 'Name of library file being analyzed',
format => 'Library format (OpenSWATH, Peakview, or Spectronaut) ',
pepions => 'Number of peptide ions (i.e., precursor, sequence + modifications + charge)',
fragments => 'Number of fragments (fragment ions) in library',
ptp_percent => 'Percentage of proteotypic peptide ions (not shared)',
shared_percent => 'Percentage of shared peptide ions',
shared_pepions => 'Number of shared peptide ions',
peptides => 'Number of distinct peptide sequences',
mod_peps => 'Number of distinct modified peptides (sequences + modifications)',
mod_percent => 'Percentage of distinct modified peptides with a mass modification',
total_mods => 'Number of mass modified amino acids',
chg_2 => 'Percentage of charge 2 precursors',
chg_3 => 'Percentage of charge 3 precursors',
avg_len => 'Average peptide length',
precursor_min => 'Minimum precursor m/z (mass/charge) in library',
precursor_max => 'Maximum precursor m/z in library',
fragment_min => 'Minimum fragment m/z in library',
fragment_max => 'Maximum fragment m/z in library',
avg_num_frags => 'Average number of fragments per assay (precursor)',
avg_frag_len => 'Average fragment sequence length',
short_perc => 'Percentage of assays with 5 or fewer transitions',
y_perc => 'Percentage of y ions',
b_perc => 'Percentage of b ions',
t6_y_perc => 'Percentage of y  ions considering only the top 6 fragments per assay',
avg_intensity => 'Average intensity',
low_nr => 'Number of fragments annotated as y1,y2,b1, or b2',
fragment_above_precursor => 'Percentage of fragment m/z above precursor m/z',
max_intensity_idx => 'Average index of most intense fragment ',
med_ppp => 'Median number of peptide ions per protein ',
mean_ppp => 'Mean number of peptide ions per protein ',
stddev_ppp => 'Standard deviation of the number of peptide ions per protein ',
'3_sigma_ppp' => 'Number of peptide ions per protein more than 3 standard deviations from the mean ',
rt_min => 'Minimum retention time (RT) in library ',
rt_max => 'Maximum RT in library',
rt_med => 'Median RT in library',
rt_rsq => 'r-squared value of fit between RT of +2 and +3 charge states for the same modified peptide',
rt_five => 'Percentage of +2/+3 charge pairs of the same modified peptide within 5 RT units of each other',
n_irt => 'Number of iRT peptides in library',
irt_cnt => 'Number of iRT assays (precursor + fragment)',
db_peps => 'Peptides in reference library, often 7-50 AA',
seen_peps => 'Number of library peptides seen',
mc_peps => 'Number of missed cleavage peptides',
db_prots => 'Number of proteins in reference library',
lib_prots => 'Number of protein groups with at least one assay in ion library',
seen_prots => 'Number of library proteins seen',
ux_prots => 'Unexplained (not in reference library) proteins',
decoy_pct => 'Percentage of decoy (optionally includes "alternative", non-db decoys) assays',
mixed_pct => 'Percentage of mixed decoy/target assays (has both decoy and no-decoy annotations)',
fwd_pct => 'Percentage of target (non-decoy) assays',
precursor_ok => 'Number of assays where precursor is within 5 ppm (parts per million m/z) of theoretical',
precursor_bad => 'Number of assays where precursor is more than 5 ppm from theoretical',
fragment_ok => 'Number of assays where fragment is within 1 ppm of theoretical',
fragment_bad => 'Number of assays where fragment is more than 1 ppm from theoretical',
fragment_na => 'Number of assays where peak annotation is not found in expected b/y series',
fragment_avg_mdiff => 'Average m/z difference between reported and theoretical fragment',
swa_defined => 'Number of peptide ions that fall into a defined SWATH bin ',
swa_missing => 'Number of peptide ions that fail to fall into a defined SWATH bin ( Pepions - swa_defined)',
swa_conflict => 'Number of fragment_ions that fall into same SWATH(s) as precursor',
swa_ok => 'Number of fragment_ions that do not fall into same SWATH(s) as precursor',
swa_conflict_assay => 'Number of precursor that have at least one failing fragment',
swa_5 => 'Number of fragment ions that fall within 5 Th of precursor ion',
swa_25 => 'Number of fragment ions that fall within 25 Th of precursor ion',
problem_assays => 'Assays whose precursor or any fragment m/z values do not match SWATHs file or differ significantly from theoretical values',
im_min => 'Minimum ion mobility reported in library',
im_max => 'Maximum ion mobility reported in library'
      );
  return \%defs;
}

#+
#  Routine to calculate peptide mz
#-
sub get_peptide_mass {
	my $eseq = shift; # single-letter encoded sequence!

  	my $chg = shift || 2;
  	my $is_precursor = shift || 0;
	# print STDERR "Eseq is $eseq, charge is $chg\n" if $is_precursor;

	my $mz_key = $eseq . '_' . $chg;

	#return $mpep2mz{$mz_key} if $mpep2mz{$mz_key}; 
	my $mass = 0;
          
        if ($eseq =~ /[a-z]/) {
            	 for my $keys3 (keys %mods_current)
			{
	 		 $mass += $known_mods{$keys3}{'mz'} * $mods_current{$keys3};
  			}
	} else {
	 	  %mods_current = ();
 	    }

	$eseq =~ s/x//g;
	for my $aa ( split( '', uc($eseq) ) ){
		die "Unknown AA $aa from $eseq" unless $opts{mono}->{$aa};
		$mass += $opts{mono}->{$aa};
		}
	$mass += WATER_MASS;
	my $h_mass = 1.00727646688;
	my $pmz = sprintf( '%0.4f', ( $mass + $chg * H_MASS )/$chg );
	#  print STDERR "eseq is $eseq, ycnt is $ycnt, mass is $mass, pmz is $pmz\n" if $is_precursor;
	return $pmz;
}

#+
#  Routine to translate between modification namespaces
#-
sub encode_modifications {
  my $seq = shift;
  my $regex = '(\w?\[[^\]]+\]-*)'; 

  # Block for debugging mod regex
  if ( 0 ) {
    for my $mod ( keys( %known_mods ) ) {
      my $match = '0';
      if ( $mod =~ /$regex/ ) {
        $match = $1;
      }
      print "$mod\t$match\n";
    }
    exit;
  }


  if ( $seq =~ /UniMod/ ) {
    $regex = '(\w?\(UniMod:\d+\))';
  } 

  %mods_current=();
  for my $m ( $seq =~ /$regex/ig ) {
    $mods_current{$m}++;

    unless( $known_mods{$m}{'aa'} ) {
      print ">$m<";
      die "Unknown modification $m in $seq ($regex)\n";
    }
  }
  my $eseq = $seq;
  for my $m ( keys( %mods_current ) ) {
    my $em = $m;
    $em =~ s/\[/\\[/g;
    $em =~ s/\+/\\+/g;
    $em =~ s/\]/\\]/g;
    $em =~ s/\(/\\(/g;
    $em =~ s/\)/\\)/g;
    $eseq =~ s/$em/$known_mods{$m}{'aa'}/g;
  }
             
  if ( $eseq =~ /(.?\[[^\[\]]+\])/ ) {
    die "1. Unknown mod $1 in $eseq\n";
  } elsif ( $eseq =~ /\d/ ) {
    die "2. Unknown mod in $eseq\n";
  } elsif ( $eseq =~ /\[/ ) {
    die "3. Unknown mod in $eseq\n";
  } elsif ( $eseq =~ /\(/ ) {
    die "4. Unknown mod in $eseq\n";
  }
  return $eseq;
}

#+
#  Routine to generate fragment ions
#-
sub generate_ions {
  my $eseq = shift;
  
  my %b;
  my %y;
  my @aa;
  
  my $regex = '(\w?\[[^\]]+\]-*)'; 
  if ( $eseq =~ /UniMod/ ) {
    $regex = '(\w?\(UniMod:\d+\))';
  } 
  my %modindex;
  
  for my $m ( $eseq =~ /$regex/ig ) {
    my $index = index ($eseq, $m);
	my $em = $m;
	$em =~ s/\[/\\[/g;
	$em =~ s/\+/\\+/g;
	$em =~ s/\]/\\]/g;
	$em =~ s/\(/\\(/g;
	$em =~ s/\)/\\)/g;
	$em =~ s/-\[/\\[/g;
	$eseq =~ s/$em/$known_mods{$m}{'aa'}/;
	$modindex{$index} = $m;
	}
  
   @aa = split( //, $eseq );
	  for my $keymod (keys %modindex)
	{
		$aa[$keymod]= "$modindex{$keymod}";	
	} 
  
  my $bpep = '';
  my $ypep = '';

 #Nterminal modification
 $nterm_mod = $cterm_mod = '';
  if ($eseq =~ /^x/)
  {
	  $nterm_mod = shift @aa; 
  }
  if ($eseq =~ /x$/){
	  $cterm_mod = pop @aa;
  }
  
  my $last2nd = pop @aa;
  if ($last2nd =~ /-/)
   {  
		$cterm_mod = "-$cterm_mod";
   }
   else {
	   push @aa, $last2nd;
   }

 if ($nterm_mod && !$cterm_mod){
	 $bpep = $nterm_mod;
	 for ( my $idx = 0; $idx <= $#aa; $idx++ ) {
		 $bpep .= $aa[$idx];
		 $b{$idx + 1} = $bpep;
		 $ypep = $aa[$#aa - $idx] . $ypep;
		 if ($idx == ($#aa)){
			  $ypep = $nterm_mod.$ypep; 
		   }
		 $y{$idx + 1} = $ypep;
	 }
 } elsif ($cterm_mod && !$nterm_mod) {
	 $ypep = $cterm_mod || '';
	 for ( my $idx = 0; $idx <= $#aa; $idx++ ) {
		 $bpep .= $aa[$idx];
		 $b{$idx + 1} = $bpep;
		 $ypep = $aa[$#aa - $idx] . $ypep;
		 if ($idx == ($#aa)){
			 $ypep = $nterm_mod.$ypep;	
		 }
		 $y{$idx + 1} = $ypep;
		}
  }elsif ($nterm_mod && $cterm_mod){
	  $bpep = $nterm_mod || '';
	  $ypep = $cterm_mod || '';
	  for ( my $idx = 0; $idx <= $#aa; $idx++ ) {
			$bpep .= $aa[$idx];
			if ($idx == ($#aa)){
				$bpep = $bpep.$cterm_mod;
				}
			$b{$idx + 1} = $bpep;
			$ypep = $aa[$#aa - $idx] . $ypep;
			if ($idx == ($#aa)){
				$ypep = $nterm_mod.$ypep;	
			}
			$y{$idx + 1} = $ypep;
		}
	}else
	  {
		  for ( my $idx = 0; $idx <= $#aa; $idx++ ) {
			  $bpep .= $aa[$idx];
			  $b{$idx + 1} = $bpep;
			  $ypep = $aa[$#aa - $idx] . $ypep;
			  $y{$idx + 1} = $ypep;
		  }
	  }
  my %ions = ( b => \%b, y => \%y );
  return \%ions;
}

__DATA__
#
# SWATH File format info
#
# peakview 
# 0    Q1 [ 778.413 ]
# 1    Q3 [ 498.2579 ]
# 2    RT_detected [ 57.3 ]
# 3    protein_name [ 1/P0CG40 ]
# 4    isotype [ light ]
# 5    relative_intensity [ 7324.6 ]
# 6    stripped_sequence [ AAAAAAAAAAAAAAAASAGGK ]
# 7    modification_sequence [ AAAAAAAAAAAAAAAASAGGK ]
# 8    prec_z [ 2 ]
# 9    frg_type [ b ]
# 10   frg_z [ 1 ]
# 11   frg_nr [ 7 ]
# 12   iRT [ 57.3 ]
# 13   uniprot_id [ 1/P0CG40 ]
# 14   decoy [ FALSE ]
# 15   N [ 1 ]
# 16   confidence [ 1 ]
# 17   shared [ FALSE ]

# openswath
# 0    PrecursorMz [ 778.4129855 ]
# 1    ProductMz [ 498.26707303 ]
# 2    Tr_recalibrated [ 57.3 ]
# 3    transition_name [ 6_AAAAAAAAAAAAAAAASAGGK_2 ]
# 4    CE [ -1 ]
# 5    LibraryIntensity [ 7324.6 ]
# 6    transition_group_id [ 1_AAAAAAAAAAAAAAAASAGGK_2 ]
# 7    decoy [ 0 ]
# 8    PeptideSequence [ AAAAAAAAAAAAAAAASAGGK ]
# 9    ProteinName [ 1/P0CG40 ]
# 10   Annotation [ b7/-0.009,b14^2/-0.009,m10:16/-0.009 ]
# 11   FullUniModPeptideName [ AAAAAAAAAAAAAAAASAGGK ]
# 12   PrecursorCharge [ 2 ]
# 13   GroupLabel [ light ]
# 14   UniprotID [ 1/P0CG40 ]
# 15   FragmentType [ b ]
# 16   FragmentCharge [ 1 ]
# 17   FragmentSeriesNumber [ 7 ]
#
# Spectronaut
# 0     PrecursorMz [ 778.4129855 ]
# 1     PrecursorCharge [ 2 ]
# 2     ProteinName [ 1/P0CG40 ]
# 3     Organism [ Human ]
# 4     iRT Value [ 57.3 ]
# 5     PeptideSequence [ AAAAAAAAAAAAAAAASAGGK ]
# 6     PeptideModifiedSequence [ AAAAAAAAAAAAAAAASAGGK ]
# 7     FragmentType [ b ]
# 8     FragmentNumber [ 6 ]
# 9     Fragmentation [ b6 ]
# 10    Losses [  ]
# 11    LossNeutralMass [ 0 ]
# 12    ProductCharge [ 1 ]
# 13    ProductMz [ 427.22995924 ]
# 14    LibraryIntensity [ 8319.7 ]
# 15    ProteinId [ 1/P0CG40 ]
# 

# 0	ReferenceRun [ C_D160304_S251-Hela-2ug-2h_MSG_R01_T0 ]
# 1	PrecursorCharge [ 3 ]
# 2	IntModifiedPeptide [ _SLDTHVTK_ ]
# 3	ModifiedPeptide [ _SLDTHVTK_ ]
# 4	StrippedPeptide [ SLDTHVTK ]
# 5	iRT [ -35.19764 ]
# 6	BGSInferenceId [ Q14166 ]
# 7	IsProteotypic [ True ]
# 8	LabeledPeptide [ _SLDTHVTK_ ]
# 9	PrecursorMz [ 300.831025070542 ]
# 10	ReferenceRunQvalue [ 0.000844789494294673 ]
# 11	ReferenceRunMS1Response [ 9.9044E+07 ]
# 12	FragmentLossType [ noloss ]
# 13	FragmentNumber [ 3 ]
# 14	FragmentType [ y ]
# 15	FragmentCharge [ 1 ]
# 16	FragmentMz [ 347.228896545912 ]
# 17	RelativeIntensity [ 100 ]
# 18	ExcludeFromAssay [ False ]
# 19	Database [ sp ]
# 20	ProteinGroups [ Q14166 ]
# 21	UniProtIds [ Q14166 ]
# 22	Protein Name [ TTL12_HUMAN ]
# 23	ProteinDescription [ Tubulin--tyrosine ligase-like protein 12 ]
# 24	Organisms [ Homo sapiens ]
# 25	Genes [ TTLL12 ]
# 26	Protein Existence [ 1 ]
# 27	Sequence Version [ 2 ]
# 28	FASTAName [ uniprot_sprot_2014-12-11_HUMAN_ISOFORMS ]
#
# PROSIT
# 0     RelativeIntensity [ 0.047176161128631676 ]
# 1     FragmentMz [ 147.112804167 ]
# 2     ModifiedPeptide [ _AAAAAAALQAK_ ]
# 3     LabeledPeptide [ AAAAAAALQAK ]
# 4     StrippedPeptide [ AAAAAAALQAK ]
# 5     PrecursorCharge [ 2 ]
# 6     PrecursorMz [ 478.77981619566503 ]
# 7     iRT [ 9.95382308959961 ]
# 8     FragmentNumber [ 1 ]
# 9     FragmentType [ y ]
# 10    FragmentCharge [ 1 ]
# 11    FragmentLossType [ noloss ]
#
# CHROMAT_LIBRARY (Modified OSW)
# 0     transition_group_id [ HEELMLGDPC[57]LK+2 ]
# 1     transition_name [ HEELMLGDPC[57]LK+2_b3+2 ]
# 2     ProteinId [ sp|P07814|SYEP_HUMAN ]
# 3     PeptideSequence [ HEELMLGDPCLK ]
# 4     ModifiedPeptideSequence [ HEELMLGDPC[57]LK ]
# 5     FullPeptideName [ HEELMLGDPC[57]LK ]
# 6     RetentionTime [ 2220.0 ]
# 7     PrecursorMz [ 721.3443378086647 ]
# 8     PrecursorCharge [ 2 ]
# 9     ProductMz [ 396.151374562 ]
# 10    ProductCharge [ 1 ]
# 11    LibraryIntensity [ 1111.5 ]
# 12    FragmentIonType [ b ]
# 13    FragmentSeriesNumber [ 3 ]
# 14    IsDecoy [ 0 ]
# 15    quantifying_transition [ 1 ]
#
#EOF