001/*
002 * (C) Copyright 2015 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Nuxeo
018 */
019
020package org.nuxeo.ecm.blob.azure;
021
022import java.net.URISyntaxException;
023import java.util.EnumSet;
024import java.util.HashSet;
025import java.util.Set;
026import java.util.regex.Pattern;
027
028import org.apache.commons.logging.Log;
029import org.apache.commons.logging.LogFactory;
030import org.nuxeo.ecm.blob.AbstractBinaryGarbageCollector;
031
032import com.microsoft.azure.storage.ResultContinuation;
033import com.microsoft.azure.storage.ResultSegment;
034import com.microsoft.azure.storage.StorageException;
035import com.microsoft.azure.storage.blob.BlobListingDetails;
036import com.microsoft.azure.storage.blob.CloudBlockBlob;
037import com.microsoft.azure.storage.blob.ListBlobItem;
038
039/**
040 * @author <a href="mailto:ak@nuxeo.com">Arnaud Kervern</a>
041 * @since 7.10
042 */
043public class AzureGarbageCollector extends AbstractBinaryGarbageCollector<AzureBinaryManager> {
044
045    private static final Log log = LogFactory.getLog(AzureGarbageCollector.class);
046
047    private static final Pattern MD5_RE = Pattern.compile("[0-9a-f]{32}");
048
049    public AzureGarbageCollector(AzureBinaryManager binaryManager) {
050        super(binaryManager);
051    }
052
053    @Override
054    public String getId() {
055        return "azure:" + binaryManager.container.getName();
056    }
057
058    @Override
059    public Set<String> getUnmarkedBlobs() {
060        Set<String> unmarked = new HashSet<>();
061        ResultContinuation continuationToken = null;
062        ResultSegment<ListBlobItem> lbs;
063        do {
064            try {
065                lbs = binaryManager.container.listBlobsSegmented(binaryManager.prefix, false,
066                        EnumSet.noneOf(BlobListingDetails.class), null, continuationToken, null, null);
067            } catch (StorageException e) {
068                throw new RuntimeException(e);
069            }
070
071            for (ListBlobItem item : lbs.getResults()) {
072
073                if (!(item instanceof CloudBlockBlob)) {
074                    // ignore subdirectories
075                    continue;
076                }
077
078                CloudBlockBlob blob = (CloudBlockBlob) item;
079
080                String digest;
081                try {
082                    String name = blob.getName();
083                    digest = name.substring(binaryManager.prefix.length());
084                } catch (URISyntaxException e) {
085                    // Should never happends
086                    // @see com.microsoft.azure.storage.blob.CloudBlob.getName()
087                    continue;
088                }
089
090                if (!isMD5(digest)) {
091                    // ignore files that cannot be MD5 digests for
092                    // safety
093                    continue;
094                }
095
096                long length = blob.getProperties().getLength();
097                if (marked.contains(digest)) {
098                    status.numBinaries++;
099                    status.sizeBinaries += length;
100                    marked.remove(digest); // optimize memory
101                } else {
102                    status.numBinariesGC++;
103                    status.sizeBinariesGC += length;
104                    // record file to delete
105                    unmarked.add(digest);
106                }
107            }
108
109            continuationToken = lbs.getContinuationToken();
110        } while (lbs.getHasMoreResults());
111        marked = null; // help GC
112
113        return unmarked;
114    }
115
116    public static boolean isMD5(String digest) {
117        return MD5_RE.matcher(digest).matches();
118    }
119}