001/* 002 * (C) Copyright 2015 Nuxeo SA (http://nuxeo.com/) and contributors. 003 * 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the GNU Lesser General Public License 006 * (LGPL) version 2.1 which accompanies this distribution, and is available at 007 * http://www.gnu.org/licenses/lgpl-2.1.html 008 * 009 * This library is distributed in the hope that it will be useful, 010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 012 * Lesser General Public License for more details. 013 * 014 * Contributors: 015 * Nuxeo 016 */ 017 018package org.nuxeo.ecm.blob.azure; 019 020import java.net.URISyntaxException; 021import java.util.EnumSet; 022import java.util.HashSet; 023import java.util.Set; 024import java.util.regex.Pattern; 025 026import org.apache.commons.logging.Log; 027import org.apache.commons.logging.LogFactory; 028import org.nuxeo.ecm.blob.AbstractBinaryGarbageCollector; 029 030import com.microsoft.azure.storage.ResultContinuation; 031import com.microsoft.azure.storage.ResultSegment; 032import com.microsoft.azure.storage.StorageException; 033import com.microsoft.azure.storage.blob.BlobListingDetails; 034import com.microsoft.azure.storage.blob.CloudBlockBlob; 035import com.microsoft.azure.storage.blob.ListBlobItem; 036 037/** 038 * @author <a href="mailto:ak@nuxeo.com">Arnaud Kervern</a> 039 * @since 7.10 040 */ 041public class AzureGarbageCollector extends AbstractBinaryGarbageCollector<AzureBinaryManager> { 042 043 private static final Log log = LogFactory.getLog(AzureGarbageCollector.class); 044 045 private static final Pattern MD5_RE = Pattern.compile("(.*/)?[0-9a-f]{32}"); 046 047 public AzureGarbageCollector(AzureBinaryManager binaryManager) { 048 super(binaryManager); 049 } 050 051 @Override 052 public String getId() { 053 return "azure:" + binaryManager.container.getName(); 054 } 055 056 @Override 057 public Set<String> getUnmarkedBlobs() { 058 Set<String> unmarked = new HashSet<>(); 059 ResultContinuation continuationToken = null; 060 ResultSegment<ListBlobItem> lbs; 061 do { 062 try { 063 lbs = binaryManager.container.listBlobsSegmented(null, false, EnumSet.noneOf(BlobListingDetails.class), 064 null, continuationToken, null, null); 065 } catch (StorageException e) { 066 throw new RuntimeException(e); 067 } 068 069 for (ListBlobItem item : lbs.getResults()) { 070 071 if (!(item instanceof CloudBlockBlob)) { 072 // ignore wrong blob type 073 continue; 074 } 075 076 CloudBlockBlob blob = (CloudBlockBlob) item; 077 078 String digest; 079 try { 080 digest = blob.getName(); 081 } catch (URISyntaxException e) { 082 // Should never happends 083 // @see com.microsoft.azure.storage.blob.CloudBlob.getName() 084 continue; 085 } 086 087 if (!isMD5(digest)) { 088 // ignore files that cannot be MD5 digests for 089 // safety 090 continue; 091 } 092 093 long length = blob.getProperties().getLength(); 094 if (marked.contains(digest)) { 095 status.numBinaries++; 096 status.sizeBinaries += length; 097 marked.remove(digest); // optimize memory 098 } else { 099 status.numBinariesGC++; 100 status.sizeBinariesGC += length; 101 // record file to delete 102 unmarked.add(digest); 103 } 104 } 105 106 continuationToken = lbs.getContinuationToken(); 107 } while (lbs.getHasMoreResults()); 108 marked = null; // help GC 109 110 return unmarked; 111 } 112 113 public static boolean isMD5(String digest) { 114 return MD5_RE.matcher(digest).matches(); 115 } 116}