001/*
002 * (C) Copyright 2015 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Nuxeo
018 */
019
020package org.nuxeo.ecm.blob.azure;
021
022import java.net.URISyntaxException;
023import java.util.EnumSet;
024import java.util.HashSet;
025import java.util.Set;
026import java.util.regex.Pattern;
027
028import org.apache.commons.logging.Log;
029import org.apache.commons.logging.LogFactory;
030import org.nuxeo.ecm.blob.AbstractBinaryGarbageCollector;
031
032import com.microsoft.azure.storage.ResultContinuation;
033import com.microsoft.azure.storage.ResultSegment;
034import com.microsoft.azure.storage.StorageException;
035import com.microsoft.azure.storage.blob.BlobListingDetails;
036import com.microsoft.azure.storage.blob.CloudBlockBlob;
037import com.microsoft.azure.storage.blob.ListBlobItem;
038
039/**
040 * @author <a href="mailto:ak@nuxeo.com">Arnaud Kervern</a>
041 * @since 7.10
042 */
043public class AzureGarbageCollector extends AbstractBinaryGarbageCollector<AzureBinaryManager> {
044
045    private static final Log log = LogFactory.getLog(AzureGarbageCollector.class);
046
047    private static final Pattern MD5_RE = Pattern.compile("(.*/)?[0-9a-f]{32}");
048
049    public AzureGarbageCollector(AzureBinaryManager binaryManager) {
050        super(binaryManager);
051    }
052
053    @Override
054    public String getId() {
055        return "azure:" + binaryManager.container.getName();
056    }
057
058    @Override
059    public Set<String> getUnmarkedBlobs() {
060        Set<String> unmarked = new HashSet<>();
061        ResultContinuation continuationToken = null;
062        ResultSegment<ListBlobItem> lbs;
063        do {
064            try {
065                lbs = binaryManager.container.listBlobsSegmented(null, false, EnumSet.noneOf(BlobListingDetails.class),
066                        null, continuationToken, null, null);
067            } catch (StorageException e) {
068                throw new RuntimeException(e);
069            }
070
071            for (ListBlobItem item : lbs.getResults()) {
072
073                if (!(item instanceof CloudBlockBlob)) {
074                    // ignore wrong blob type
075                    continue;
076                }
077
078                CloudBlockBlob blob = (CloudBlockBlob) item;
079
080                String digest;
081                try {
082                    digest = blob.getName();
083                } catch (URISyntaxException e) {
084                    // Should never happends
085                    // @see com.microsoft.azure.storage.blob.CloudBlob.getName()
086                    continue;
087                }
088
089                if (!isMD5(digest)) {
090                    // ignore files that cannot be MD5 digests for
091                    // safety
092                    continue;
093                }
094
095                long length = blob.getProperties().getLength();
096                if (marked.contains(digest)) {
097                    status.numBinaries++;
098                    status.sizeBinaries += length;
099                    marked.remove(digest); // optimize memory
100                } else {
101                    status.numBinariesGC++;
102                    status.sizeBinariesGC += length;
103                    // record file to delete
104                    unmarked.add(digest);
105                }
106            }
107
108            continuationToken = lbs.getContinuationToken();
109        } while (lbs.getHasMoreResults());
110        marked = null; // help GC
111
112        return unmarked;
113    }
114
115    public static boolean isMD5(String digest) {
116        return MD5_RE.matcher(digest).matches();
117    }
118}