001/*
002 * (C) Copyright 2015 Nuxeo SA (http://nuxeo.com/) and contributors.
003 *
004 * All rights reserved. This program and the accompanying materials
005 * are made available under the terms of the GNU Lesser General Public License
006 * (LGPL) version 2.1 which accompanies this distribution, and is available at
007 * http://www.gnu.org/licenses/lgpl-2.1.html
008 *
009 * This library is distributed in the hope that it will be useful,
010 * but WITHOUT ANY WARRANTY; without even the implied warranty of
011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
012 * Lesser General Public License for more details.
013 *
014 * Contributors:
015 *     Nuxeo
016 */
017
018package org.nuxeo.ecm.blob.azure;
019
020import java.net.URISyntaxException;
021import java.util.EnumSet;
022import java.util.HashSet;
023import java.util.Set;
024import java.util.regex.Pattern;
025
026import org.apache.commons.logging.Log;
027import org.apache.commons.logging.LogFactory;
028import org.nuxeo.ecm.blob.AbstractBinaryGarbageCollector;
029
030import com.microsoft.azure.storage.ResultContinuation;
031import com.microsoft.azure.storage.ResultSegment;
032import com.microsoft.azure.storage.StorageException;
033import com.microsoft.azure.storage.blob.BlobListingDetails;
034import com.microsoft.azure.storage.blob.CloudBlockBlob;
035import com.microsoft.azure.storage.blob.ListBlobItem;
036
037/**
038 * @author <a href="mailto:ak@nuxeo.com">Arnaud Kervern</a>
039 * @since 7.10
040 */
041public class AzureGarbageCollector extends AbstractBinaryGarbageCollector<AzureBinaryManager> {
042
043    private static final Log log = LogFactory.getLog(AzureGarbageCollector.class);
044
045    private static final Pattern MD5_RE = Pattern.compile("(.*/)?[0-9a-f]{32}");
046
047    public AzureGarbageCollector(AzureBinaryManager binaryManager) {
048        super(binaryManager);
049    }
050
051    @Override
052    public String getId() {
053        return "azure:" + binaryManager.container.getName();
054    }
055
056    @Override
057    public Set<String> getUnmarkedBlobs() {
058        Set<String> unmarked = new HashSet<>();
059        ResultContinuation continuationToken = null;
060        ResultSegment<ListBlobItem> lbs;
061        do {
062            try {
063                lbs = binaryManager.container.listBlobsSegmented(null, false, EnumSet.noneOf(BlobListingDetails.class),
064                        null, continuationToken, null, null);
065            } catch (StorageException e) {
066                throw new RuntimeException(e);
067            }
068
069            for (ListBlobItem item : lbs.getResults()) {
070
071                if (!(item instanceof CloudBlockBlob)) {
072                    // ignore wrong blob type
073                    continue;
074                }
075
076                CloudBlockBlob blob = (CloudBlockBlob) item;
077
078                String digest;
079                try {
080                    digest = blob.getName();
081                } catch (URISyntaxException e) {
082                    // Should never happends
083                    // @see com.microsoft.azure.storage.blob.CloudBlob.getName()
084                    continue;
085                }
086
087                if (!isMD5(digest)) {
088                    // ignore files that cannot be MD5 digests for
089                    // safety
090                    continue;
091                }
092
093                long length = blob.getProperties().getLength();
094                if (marked.contains(digest)) {
095                    status.numBinaries++;
096                    status.sizeBinaries += length;
097                    marked.remove(digest); // optimize memory
098                } else {
099                    status.numBinariesGC++;
100                    status.sizeBinariesGC += length;
101                    // record file to delete
102                    unmarked.add(digest);
103                }
104            }
105
106            continuationToken = lbs.getContinuationToken();
107        } while (lbs.getHasMoreResults());
108        marked = null; // help GC
109
110        return unmarked;
111    }
112
113    public static boolean isMD5(String digest) {
114        return MD5_RE.matcher(digest).matches();
115    }
116}