package net.sf.picard.sam;

import java.io.File;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.picard.PicardException;
import net.sf.picard.cmdline.CommandLineProgram;
import net.sf.picard.cmdline.Option;
import net.sf.picard.cmdline.StandardOptionDefinitions;
import net.sf.picard.cmdline.Usage;
import net.sf.picard.io.IoUtil;
import net.sf.picard.metrics.MetricsFile;
import net.sf.picard.util.Histogram;
import net.sf.picard.util.Log;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMFileWriterFactory;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.SortingCollection;
import net.sf.samtools.util.SortingLongCollection;
import org.biojava.bio.program.sax.BlastLikeVersionSupport;

/* loaded from: input_file:net/sf/picard/sam/MarkDuplicates.class */
public class MarkDuplicates extends CommandLineProgram {
    private static final int MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP = 500;

    @Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "The input SAM or BAM file to analyze.  Must be coordinate sorted.")
    public File INPUT;

    @Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "The output file to right marked records to")
    public File OUTPUT;

    @Option(shortName = StandardOptionDefinitions.METRICS_FILE_SHORT_NAME, doc = "File to write duplication metrics to")
    public File METRICS_FILE;
    private SortingCollection<ReadEnds> pairSort;
    private SortingCollection<ReadEnds> fragSort;
    private SortingLongCollection duplicateIndexes;
    private Pattern READ_NAME_PATTERN;
    private final Log log = Log.getInstance(MarkDuplicates.class);

    @Usage
    public final String USAGE = "Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules. All records are then written to the output file with the duplicate records flagged.";

    @Option(doc = "If true do not write duplicates to the output file instead of writing them with appropriate flags set.")
    public boolean REMOVE_DUPLICATES = false;

    @Option(doc = "If true, assume that the input file is coordinate sorted, even if the header says otherwise.", shortName = "AS")
    public boolean ASSUME_SORTED = false;

    @Option(doc = "Regular expression that can be used to parse read names in the incoming SAM file. Read names are parsed to extract three variables: tile/region, x coordinate and y coordinate. These values are used to estimate the rate of optical duplication in order to give a more accurate estimated library size. The regular expression should contain three capture groups for the three variables, in order.")
    public String READ_NAME_REGEX = "[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*";

    @Option(doc = "The maximum offset between two duplicte clusters in order to consider them optical duplicates. This should usually be set to some fairly small number (e.g. 5-10 pixels) unless using later versions of the Illumina pipeline that multiply pixel values by 10, in which case 50-100 is more normal.")
    public int OPTICAL_DUPLICATE_PIXEL_DISTANCE = 100;
    private int numDuplicateIndices = 0;
    private final Map<String, Short> libraryIds = new HashMap();
    private short nextLibraryId = 1;
    private final Histogram<Short> opticalDupesByLibraryId = new Histogram<>();

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:net/sf/picard/sam/MarkDuplicates$ReadEndsComparator.class */
    public static class ReadEndsComparator implements Comparator<ReadEnds> {
        ReadEndsComparator() {
        }

        @Override // java.util.Comparator
        public int compare(ReadEnds readEnds, ReadEnds readEnds2) {
            int i = readEnds.libraryId - readEnds2.libraryId;
            if (i == 0) {
                i = readEnds.read1Sequence - readEnds2.read1Sequence;
            }
            if (i == 0) {
                i = readEnds.read1Coordinate - readEnds2.read1Coordinate;
            }
            if (i == 0) {
                i = readEnds.orientation - readEnds2.orientation;
            }
            if (i == 0) {
                i = readEnds.read2Sequence - readEnds2.read2Sequence;
            }
            if (i == 0) {
                i = readEnds.read2Coordinate - readEnds2.read2Coordinate;
            }
            if (i == 0) {
                i = (int) (readEnds.read1IndexInFile - readEnds2.read1IndexInFile);
            }
            if (i == 0) {
                i = (int) (readEnds.read2IndexInFile - readEnds2.read2IndexInFile);
            }
            return i;
        }
    }

    public static void main(String[] strArr) {
        System.exit(new MarkDuplicates().instanceMain(strArr));
    }

    @Override // net.sf.picard.cmdline.CommandLineProgram
    protected int doWork() {
        IoUtil.assertFileIsReadable(this.INPUT);
        IoUtil.assertFileIsWritable(this.OUTPUT);
        IoUtil.assertFileIsWritable(this.METRICS_FILE);
        if (this.READ_NAME_REGEX != null) {
            this.READ_NAME_PATTERN = Pattern.compile(this.READ_NAME_REGEX);
        }
        reportMemoryStats("Start of doWork");
        this.log.info("Reading input file and constructing read end information.");
        buildSortedReadEndLists();
        reportMemoryStats("After buildSortedReadEndLists");
        generateDuplicateIndexes();
        reportMemoryStats("After generateDuplicateIndexes");
        this.log.info("Marking " + this.numDuplicateIndices + " records as duplicates.");
        this.log.info("Found " + ((long) this.opticalDupesByLibraryId.getSumOfValues()) + " optical duplicate clusters.");
        HashMap hashMap = new HashMap();
        SAMFileReader sAMFileReader = new SAMFileReader(this.INPUT);
        SAMFileHeader fileHeader = sAMFileReader.getFileHeader();
        SAMFileHeader m229clone = fileHeader.m229clone();
        m229clone.setSortOrder(SAMFileHeader.SortOrder.coordinate);
        SAMFileWriter makeSAMOrBAMWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(m229clone, true, this.OUTPUT);
        long j = 0;
        long next = this.duplicateIndexes.hasNext() ? this.duplicateIndexes.next() : -1L;
        Iterator<SAMRecord> iterator2 = sAMFileReader.iterator2();
        while (iterator2.hasNext()) {
            SAMRecord next2 = iterator2.next();
            String libraryName = getLibraryName(fileHeader, next2);
            DuplicationMetrics duplicationMetrics = (DuplicationMetrics) hashMap.get(libraryName);
            if (duplicationMetrics == null) {
                duplicationMetrics = new DuplicationMetrics();
                duplicationMetrics.LIBRARY = libraryName;
                hashMap.put(libraryName, duplicationMetrics);
            }
            if (next2.getReadUnmappedFlag()) {
                duplicationMetrics.UNMAPPED_READS++;
            } else if (!next2.getReadPairedFlag() || next2.getMateUnmappedFlag()) {
                duplicationMetrics.UNPAIRED_READS_EXAMINED++;
            } else {
                duplicationMetrics.READ_PAIRS_EXAMINED++;
            }
            long j2 = j;
            j = j2 + 1;
            if (j2 == next) {
                next2.setDuplicateReadFlag(true);
                if (!next2.getReadPairedFlag() || next2.getMateUnmappedFlag()) {
                    duplicationMetrics.UNPAIRED_READ_DUPLICATES++;
                } else {
                    duplicationMetrics.READ_PAIR_DUPLICATES++;
                }
                next = this.duplicateIndexes.hasNext() ? this.duplicateIndexes.next() : -1L;
            } else {
                next2.setDuplicateReadFlag(false);
            }
            if (!this.REMOVE_DUPLICATES || !next2.getDuplicateReadFlag()) {
                makeSAMOrBAMWriter.addAlignment(next2);
            }
        }
        reportMemoryStats("Before output close");
        makeSAMOrBAMWriter.close();
        reportMemoryStats("After output close");
        MetricsFile metricsFile = getMetricsFile();
        for (Map.Entry entry : hashMap.entrySet()) {
            String str = (String) entry.getKey();
            DuplicationMetrics duplicationMetrics2 = (DuplicationMetrics) entry.getValue();
            duplicationMetrics2.READ_PAIRS_EXAMINED /= 2;
            duplicationMetrics2.READ_PAIR_DUPLICATES /= 2;
            Histogram.Bin bin = this.opticalDupesByLibraryId.get(this.libraryIds.get(str));
            if (bin != null) {
                duplicationMetrics2.READ_PAIR_OPTICAL_DUPLICATES = (long) bin.getValue();
            }
            duplicationMetrics2.calculateDerivedMetrics();
            metricsFile.addMetric(duplicationMetrics2);
        }
        if (hashMap.size() == 1) {
            metricsFile.setHistogram(((DuplicationMetrics) hashMap.values().iterator().next()).calculateRoiHistogram());
        }
        metricsFile.write(this.METRICS_FILE);
        return 0;
    }

    private void reportMemoryStats(String str) {
        System.gc();
        Runtime runtime = Runtime.getRuntime();
        this.log.info(str + " freeMemory: " + runtime.freeMemory() + "; totalMemory: " + runtime.totalMemory() + "; maxMemory: " + runtime.maxMemory());
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r3v20 */
    /* JADX WARN: Type inference failed for: r3v21 */
    /* JADX WARN: Type inference failed for: r3v22 */
    /* JADX WARN: Type inference failed for: r3v27 */
    /* JADX WARN: Type inference failed for: r3v29 */
    /* JADX WARN: Type inference failed for: r3v30 */
    /* JADX WARN: Type inference failed for: r3v31 */
    /* JADX WARN: Type inference failed for: r3v32 */
    /* JADX WARN: Type inference failed for: r3v33 */
    /* JADX WARN: Type inference failed for: r3v34 */
    /* JADX WARN: Type inference failed for: r3v5 */
    /* JADX WARN: Type inference failed for: r3v6 */
    private void buildSortedReadEndLists() {
        int maxMemory = (int) ((Runtime.getRuntime().maxMemory() * 0.25d) / 63.0d);
        this.log.info("Will retain up to " + maxMemory + " data points before spilling to disk.");
        this.pairSort = SortingCollection.newInstance(ReadEnds.class, new ReadEndsCodec(), new ReadEndsComparator(), maxMemory);
        ReadEndsCodec readEndsCodec = new ReadEndsCodec();
        ReadEndsComparator readEndsComparator = new ReadEndsComparator();
        this.fragSort = SortingCollection.newInstance(ReadEnds.class, readEndsCodec, readEndsComparator, maxMemory);
        SAMFileReader sAMFileReader = new SAMFileReader(this.INPUT);
        SAMFileHeader fileHeader = sAMFileReader.getFileHeader();
        ?? r3 = readEndsComparator;
        if (fileHeader.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
            if (!this.ASSUME_SORTED) {
                throw new PicardException(this.INPUT + " is not coordinate sorted.");
            }
            r3 = 0;
            this.log.info("Assuming input is coordinate sorted.");
        }
        ReadEndsMap rAMReadEndsMap = fileHeader.getSequenceDictionary().getSequences().size() > MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP ? new RAMReadEndsMap() : new DiskReadEndsMap();
        long j = 0;
        Iterator<SAMRecord> iterator2 = sAMFileReader.iterator2();
        while (iterator2.hasNext()) {
            SAMRecord next = iterator2.next();
            if (next.getReadUnmappedFlag()) {
                r3 = r3;
                if (next.getReferenceIndex().intValue() == -1) {
                    break;
                }
            } else {
                SAMRecord sAMRecord = next;
                ReadEnds buildReadEnds = buildReadEnds(fileHeader, j, sAMRecord);
                this.fragSort.add(buildReadEnds);
                r3 = sAMRecord;
                if (next.getReadPairedFlag()) {
                    r3 = sAMRecord;
                    if (!next.getMateUnmappedFlag()) {
                        String str = next.getAttribute(ReservedTagConstants.READ_GROUP_ID) + ":" + next.getReadName();
                        ReadEnds remove = rAMReadEndsMap.remove(next.getReferenceIndex().intValue(), str);
                        if (remove == null) {
                            ReadEnds buildReadEnds2 = buildReadEnds(fileHeader, j, next);
                            ReadEnds readEnds = buildReadEnds2;
                            rAMReadEndsMap.put(buildReadEnds2.read2Sequence, str, readEnds);
                            r3 = readEnds;
                        } else {
                            int i = buildReadEnds.read1Sequence;
                            int i2 = buildReadEnds.read1Coordinate;
                            if (i > remove.read1Sequence || (i == remove.read1Sequence && i2 >= remove.read1Coordinate)) {
                                remove.read2Sequence = i;
                                remove.read2Coordinate = i2;
                                remove.read2IndexInFile = j;
                                remove.orientation = getOrientationByte(remove.orientation == 1, next.getReadNegativeStrandFlag());
                            } else {
                                remove.read2Sequence = remove.read1Sequence;
                                remove.read2Coordinate = remove.read1Coordinate;
                                remove.read2IndexInFile = remove.read1IndexInFile;
                                remove.read1Sequence = i;
                                remove.read1Coordinate = i2;
                                remove.read1IndexInFile = j;
                                remove.orientation = getOrientationByte(next.getReadNegativeStrandFlag(), remove.orientation == 1);
                            }
                            SAMRecord sAMRecord2 = next;
                            remove.score = (short) (remove.score + getScore(sAMRecord2));
                            this.pairSort.add(remove);
                            r3 = sAMRecord2;
                        }
                    }
                }
            }
            long j2 = j + 1;
            j = r3 == true ? 1 : 0;
            if (j2 % 1000000 == 0) {
                r3 = 0;
                this.log.info("Read " + j + " records. Tracking " + rAMReadEndsMap.size() + " as yet unmatched pairs. " + rAMReadEndsMap.sizeInRam() + " records in RAM.  Last sequence index: " + next.getReferenceIndex());
            }
        }
        this.log.info("Read " + j + " records. " + rAMReadEndsMap.size() + " pairs never matched.");
        sAMFileReader.close();
        this.pairSort.doneAdding();
        this.fragSort.doneAdding();
    }

    private ReadEnds buildReadEnds(SAMFileHeader sAMFileHeader, long j, SAMRecord sAMRecord) {
        ReadEnds readEnds = new ReadEnds();
        readEnds.read1Sequence = sAMRecord.getReferenceIndex().intValue();
        readEnds.read1Coordinate = sAMRecord.getReadNegativeStrandFlag() ? sAMRecord.getUnclippedEnd() : sAMRecord.getUnclippedStart();
        readEnds.orientation = sAMRecord.getReadNegativeStrandFlag() ? (byte) 1 : (byte) 0;
        readEnds.read1IndexInFile = j;
        readEnds.score = getScore(sAMRecord);
        if (sAMRecord.getReadPairedFlag() && !sAMRecord.getMateUnmappedFlag()) {
            readEnds.read2Sequence = sAMRecord.getMateReferenceIndex().intValue();
        }
        readEnds.libraryId = getLibraryId(sAMFileHeader, sAMRecord);
        if (this.READ_NAME_PATTERN != null) {
            Matcher matcher = this.READ_NAME_PATTERN.matcher(sAMRecord.getReadName());
            if (matcher.matches()) {
                readEnds.tile = (byte) Integer.parseInt(matcher.group(1));
                readEnds.x = (short) Integer.parseInt(matcher.group(2));
                readEnds.y = (short) Integer.parseInt(matcher.group(3));
                readEnds.readGroup = (short) 0;
                String str = (String) sAMRecord.getAttribute(StandardOptionDefinitions.READ_GROUP_ID_SHORT_NAME);
                List<SAMReadGroupRecord> readGroups = sAMFileHeader.getReadGroups();
                if (str != null && readGroups != null) {
                    Iterator<SAMReadGroupRecord> it = readGroups.iterator();
                    while (it.hasNext() && !it.next().getReadGroupId().equals(str)) {
                        readEnds.readGroup = (short) (readEnds.readGroup + 1);
                    }
                }
            }
        }
        return readEnds;
    }

    private short getLibraryId(SAMFileHeader sAMFileHeader, SAMRecord sAMRecord) {
        String libraryName = getLibraryName(sAMFileHeader, sAMRecord);
        Short sh = this.libraryIds.get(libraryName);
        if (sh == null) {
            short s = this.nextLibraryId;
            this.nextLibraryId = (short) (s + 1);
            sh = Short.valueOf(s);
            this.libraryIds.put(libraryName, sh);
        }
        return sh.shortValue();
    }

    private String getLibraryName(SAMFileHeader sAMFileHeader, SAMRecord sAMRecord) {
        SAMReadGroupRecord readGroup;
        String str = (String) sAMRecord.getAttribute(StandardOptionDefinitions.READ_GROUP_ID_SHORT_NAME);
        return (str == null || (readGroup = sAMFileHeader.getReadGroup(str)) == null) ? "Unknown Library" : readGroup.getLibrary();
    }

    private byte getOrientationByte(boolean z, boolean z2) {
        return z ? z2 ? (byte) 4 : (byte) 5 : z2 ? (byte) 3 : (byte) 2;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v13, types: [short] */
    private short getScore(SAMRecord sAMRecord) {
        byte b = 0;
        for (byte b2 : sAMRecord.getBaseQualities()) {
            if (b2 >= 15) {
                b = (short) (b + b2);
            }
        }
        return b;
    }

    private void generateDuplicateIndexes() {
        int maxMemory = (int) ((Runtime.getRuntime().maxMemory() * 0.25d) / 8.0d);
        this.log.info("Will retain up to " + maxMemory + " duplicate indices before spilling to disk.");
        this.duplicateIndexes = new SortingLongCollection(maxMemory, this.TMP_DIR);
        ReadEnds readEnds = null;
        ArrayList arrayList = new ArrayList(BlastLikeVersionSupport.V2_0A19MP_WASHU);
        this.log.info("Traversing read pair information and detecting duplicates.");
        Iterator it = this.pairSort.iterator();
        while (it.hasNext()) {
            ReadEnds readEnds2 = (ReadEnds) it.next();
            if (readEnds == null) {
                readEnds = readEnds2;
                arrayList.add(readEnds);
            } else if (areComparableForDuplicates(readEnds, readEnds2, true)) {
                arrayList.add(readEnds2);
            } else {
                if (arrayList.size() > 1) {
                    markDuplicatePairs(arrayList);
                }
                arrayList.clear();
                arrayList.add(readEnds2);
                readEnds = readEnds2;
            }
        }
        markDuplicatePairs(arrayList);
        this.pairSort = null;
        this.log.info("Traversing fragment information and detecting duplicates.");
        boolean z = false;
        boolean z2 = false;
        Iterator it2 = this.fragSort.iterator();
        while (it2.hasNext()) {
            ReadEnds readEnds3 = (ReadEnds) it2.next();
            if (readEnds == null || !areComparableForDuplicates(readEnds, readEnds3, false)) {
                if (arrayList.size() > 1 && z2) {
                    markDuplicateFragments(arrayList, z);
                }
                arrayList.clear();
                arrayList.add(readEnds3);
                readEnds = readEnds3;
                z = readEnds3.isPaired();
                z2 = !readEnds3.isPaired();
            } else {
                arrayList.add(readEnds3);
                z = z || readEnds3.isPaired();
                z2 = z2 || !readEnds3.isPaired();
            }
        }
        markDuplicateFragments(arrayList, z);
        this.fragSort = null;
        this.log.info("Sorting list of duplicate records.");
        this.duplicateIndexes.doneAddingStartIteration();
    }

    private boolean areComparableForDuplicates(ReadEnds readEnds, ReadEnds readEnds2, boolean z) {
        boolean z2 = readEnds.libraryId == readEnds2.libraryId && readEnds.read1Sequence == readEnds2.read1Sequence && readEnds.read1Coordinate == readEnds2.read1Coordinate && readEnds.orientation == readEnds2.orientation;
        if (z2 && z) {
            z2 = readEnds.read2Sequence == readEnds2.read2Sequence && readEnds.read2Coordinate == readEnds2.read2Coordinate;
        }
        return z2;
    }

    private void addIndexAsDuplicate(long j) {
        this.duplicateIndexes.add(j);
        this.numDuplicateIndices++;
    }

    private void markDuplicatePairs(List<ReadEnds> list) {
        short s = 0;
        ReadEnds readEnds = null;
        for (ReadEnds readEnds2 : list) {
            if (readEnds2.score > s || readEnds == null) {
                s = readEnds2.score;
                readEnds = readEnds2;
            }
        }
        for (ReadEnds readEnds3 : list) {
            if (readEnds3 != readEnds) {
                addIndexAsDuplicate(readEnds3.read1IndexInFile);
                addIndexAsDuplicate(readEnds3.read2IndexInFile);
            }
        }
        trackOpticalDuplicates(list);
    }

    private void trackOpticalDuplicates(List<ReadEnds> list) {
        int size = list.size();
        boolean[] zArr = new boolean[list.size()];
        for (int i = 0; i < size; i++) {
            ReadEnds readEnds = list.get(i);
            for (int i2 = i + 1; i2 < size; i2++) {
                ReadEnds readEnds2 = list.get(i2);
                if (readEnds.readGroup == readEnds2.readGroup && readEnds.tile >= 0 && readEnds.tile == readEnds2.tile) {
                    int abs = Math.abs(readEnds.x - readEnds2.x);
                    int abs2 = Math.abs(readEnds.y - readEnds2.y);
                    if (abs <= this.OPTICAL_DUPLICATE_PIXEL_DISTANCE && abs2 <= this.OPTICAL_DUPLICATE_PIXEL_DISTANCE) {
                        zArr[i2] = true;
                    }
                }
            }
        }
        int i3 = 0;
        for (boolean z : zArr) {
            if (z) {
                i3++;
            }
        }
        if (i3 > 0) {
            this.opticalDupesByLibraryId.increment(Short.valueOf(list.get(0).libraryId), i3);
        }
    }

    private void markDuplicateFragments(List<ReadEnds> list, boolean z) {
        if (z) {
            for (ReadEnds readEnds : list) {
                if (!readEnds.isPaired()) {
                    addIndexAsDuplicate(readEnds.read1IndexInFile);
                }
            }
            return;
        }
        short s = 0;
        ReadEnds readEnds2 = null;
        for (ReadEnds readEnds3 : list) {
            if (readEnds3.score > s || readEnds2 == null) {
                s = readEnds3.score;
                readEnds2 = readEnds3;
            }
        }
        for (ReadEnds readEnds4 : list) {
            if (readEnds4 != readEnds2) {
                addIndexAsDuplicate(readEnds4.read1IndexInFile);
            }
        }
    }
}
