001/* 002 * (C) Copyright 2015 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo 018 */ 019 020package org.nuxeo.ecm.blob.azure; 021 022import java.net.URISyntaxException; 023import java.util.EnumSet; 024import java.util.HashSet; 025import java.util.Set; 026import java.util.regex.Pattern; 027 028import org.apache.commons.logging.Log; 029import org.apache.commons.logging.LogFactory; 030import org.nuxeo.ecm.blob.AbstractBinaryGarbageCollector; 031 032import com.microsoft.azure.storage.ResultContinuation; 033import com.microsoft.azure.storage.ResultSegment; 034import com.microsoft.azure.storage.StorageException; 035import com.microsoft.azure.storage.blob.BlobListingDetails; 036import com.microsoft.azure.storage.blob.CloudBlockBlob; 037import com.microsoft.azure.storage.blob.ListBlobItem; 038 039/** 040 * @author <a href="mailto:ak@nuxeo.com">Arnaud Kervern</a> 041 * @since 7.10 042 */ 043public class AzureGarbageCollector extends AbstractBinaryGarbageCollector<AzureBinaryManager> { 044 045 private static final Log log = LogFactory.getLog(AzureGarbageCollector.class); 046 047 private static final Pattern MD5_RE = Pattern.compile("(.*/)?[0-9a-f]{32}"); 048 049 public AzureGarbageCollector(AzureBinaryManager binaryManager) { 050 super(binaryManager); 051 } 052 053 @Override 054 public String getId() { 055 return "azure:" + binaryManager.container.getName(); 056 } 057 058 @Override 059 public Set<String> getUnmarkedBlobs() { 060 Set<String> unmarked = new HashSet<>(); 061 ResultContinuation continuationToken = null; 062 ResultSegment<ListBlobItem> lbs; 063 do { 064 try { 065 lbs = binaryManager.container.listBlobsSegmented(null, false, EnumSet.noneOf(BlobListingDetails.class), 066 null, continuationToken, null, null); 067 } catch (StorageException e) { 068 throw new RuntimeException(e); 069 } 070 071 for (ListBlobItem item : lbs.getResults()) { 072 073 if (!(item instanceof CloudBlockBlob)) { 074 // ignore wrong blob type 075 continue; 076 } 077 078 CloudBlockBlob blob = (CloudBlockBlob) item; 079 080 String digest; 081 try { 082 digest = blob.getName(); 083 } catch (URISyntaxException e) { 084 // Should never happends 085 // @see com.microsoft.azure.storage.blob.CloudBlob.getName() 086 continue; 087 } 088 089 if (!isMD5(digest)) { 090 // ignore files that cannot be MD5 digests for 091 // safety 092 continue; 093 } 094 095 long length = blob.getProperties().getLength(); 096 if (marked.contains(digest)) { 097 status.numBinaries++; 098 status.sizeBinaries += length; 099 marked.remove(digest); // optimize memory 100 } else { 101 status.numBinariesGC++; 102 status.sizeBinariesGC += length; 103 // record file to delete 104 unmarked.add(digest); 105 } 106 } 107 108 continuationToken = lbs.getContinuationToken(); 109 } while (lbs.getHasMoreResults()); 110 marked = null; // help GC 111 112 return unmarked; 113 } 114 115 public static boolean isMD5(String digest) { 116 return MD5_RE.matcher(digest).matches(); 117 } 118}