001/*
002 * (C) Copyright 2006-2011 Nuxeo SA (http://nuxeo.com/) and contributors.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019package org.nuxeo.common.file;
020
021import java.io.File;
022import java.io.FileOutputStream;
023import java.io.IOException;
024import java.io.InputStream;
025import java.util.HashMap;
026import java.util.LinkedList;
027import java.util.Map;
028import java.util.regex.Pattern;
029
030import org.apache.commons.collections.map.ReferenceMap;
031import org.apache.commons.io.FileCleaningTracker;
032import org.apache.commons.io.IOUtils;
033
034/**
035 * A LRU cache of {@link File}s with capped filesystem size.
036 * <p>
037 * When a new file is put in the cache, if the total size becomes more that the maximum size then least recently access
038 * entries are removed until the new file fits.
039 * <p>
040 * A file will never be actually removed from the filesystem while the File object returned by {@link #getFile} is still
041 * referenced.
042 * <p>
043 * The cache keys are restricted to a subset of ASCII: letters, digits and dashes. Usually a MD5 or SHA1 hash is used.
044 */
045public class LRUFileCache implements FileCache {
046
047    /** Allowed key pattern, used as file path. */
048    public static final Pattern SIMPLE_ASCII = Pattern.compile("[-_a-zA-Z0-9]+");
049
050    protected final File dir;
051
052    protected final long maxSize;
053
054    /** Cached files. */
055    protected final Map<String, LRUFileCacheEntry> cache;
056
057    /**
058     * Referenced files on the filesystem. Contains all the cached files, plus all those that have been marked for
059     * deletion but haven't been deleted yet. Because of the latter, this is a weak value map.
060     */
061    protected final Map<String, File> files;
062
063    /** Size of the cached files. */
064    protected long cacheSize;
065
066    /** Most recently used entries from the cache are first. */
067    protected final LinkedList<String> lru;
068
069    // this creates a new thread
070    private static final FileCleaningTracker fileCleaningTracker = new FileCleaningTracker();
071
072    /**
073     * In-memory entry for a cached file.
074     */
075    protected static class LRUFileCacheEntry {
076        public File file;
077
078        public long size;
079    }
080
081    /**
082     * Constructs a cache in the given directory with the given maximum size (in bytes).
083     *
084     * @param dir the directory to use to store cached files
085     * @param maxSize the maximum size of the cache (in bytes)
086     */
087    @SuppressWarnings("unchecked")
088    public LRUFileCache(File dir, long maxSize) {
089        this.dir = dir;
090        this.maxSize = maxSize;
091        cache = new HashMap<String, LRUFileCacheEntry>();
092        // use a weak reference for the values: don't hold values longer than
093        // they need to be referenced elsewhere
094        files = new ReferenceMap(ReferenceMap.HARD, ReferenceMap.WEAK);
095        lru = new LinkedList<String>();
096    }
097
098    @Override
099    public long getSize() {
100        return cacheSize;
101    }
102
103    @Override
104    public int getNumberOfItems() {
105        return lru.size();
106    }
107
108    @Override
109    public File getTempFile() throws IOException {
110        File tmp = File.createTempFile("nxbin_", null, dir);
111        tmp.deleteOnExit();
112        return tmp;
113    }
114
115    /**
116     * {@inheritDoc}
117     * <p>
118     * The key is used as a file name in the directory cache.
119     */
120    @Override
121    public synchronized File putFile(String key, InputStream in) throws IOException {
122        try {
123            // check the cache
124            LRUFileCacheEntry entry = cache.get(key);
125            if (entry != null) {
126                return entry.file;
127            }
128
129            // maybe the cache entry was just deleted but the file is still
130            // there?
131            File file = files.get(key);
132            if (file != null) {
133                // use not-yet-deleted file
134                return putFileInCache(key, file);
135            }
136
137            // store the stream in a temporary file
138            file = getTempFile();
139            FileOutputStream out = new FileOutputStream(file);
140            try {
141                IOUtils.copy(in, out);
142            } finally {
143                out.close();
144            }
145            return putFile(key, file);
146        } finally {
147            in.close();
148        }
149    }
150
151    /**
152     * {@inheritDoc}
153     * <p>
154     * The key is used as a file name in the directory cache.
155     */
156    @Override
157    public synchronized File putFile(String key, File file) throws IllegalArgumentException, IOException {
158        // check the cache
159        LRUFileCacheEntry entry = cache.get(key);
160        if (entry != null) {
161            file.delete(); // tmp file not used
162            return entry.file;
163        }
164
165        // maybe the cache entry was just deleted but the file is still
166        // there?
167        File dest = files.get(key);
168        if (dest != null) {
169            // use not-yet-deleted file
170            return putFileInCache(key, dest);
171        }
172
173        // put file in cache with standard name
174        checkKey(key);
175        dest = new File(dir, key);
176        if (!file.renameTo(dest)) {
177            // already something there
178            file.delete();
179        }
180        return putFileInCache(key, dest);
181    }
182
183    /**
184     * Puts a file that's already in the correct filesystem location in the internal cache datastructures.
185     */
186    protected File putFileInCache(String key, File file) {
187        // remove oldest entries until size fits
188        long size = file.length();
189        ensureCapacity(size);
190
191        // put new entry in cache
192        LRUFileCacheEntry entry = new LRUFileCacheEntry();
193        entry.size = size;
194        entry.file = file;
195        add(key, entry);
196
197        return file;
198    }
199
200    protected void checkKey(String key) throws IllegalArgumentException {
201        if (!SIMPLE_ASCII.matcher(key).matches() || ".".equals(key) || "..".equals(key)) {
202            throw new IllegalArgumentException("Invalid key: " + key);
203        }
204    }
205
206    @Override
207    public synchronized File getFile(String key) {
208        // check the cache
209        LRUFileCacheEntry entry = cache.get(key);
210        if (entry != null) {
211            // note access in most recently used list
212            recordAccess(key);
213            return entry.file;
214        }
215
216        // maybe the cache entry was just deleted but the file is still
217        // there?
218        return files.get(key);
219    }
220
221    @Override
222    public synchronized void clear() {
223        for (String key : lru) {
224            remove(key);
225        }
226        lru.clear();
227        cache.clear();
228        files.clear();
229    }
230
231    protected void recordAccess(String key) {
232        lru.remove(key); // TODO does a linear scan
233        lru.addFirst(key);
234    }
235
236    protected void add(String key, LRUFileCacheEntry entry) {
237        cache.put(key, entry);
238        files.put(key, entry.file);
239        lru.addFirst(key);
240        cacheSize += entry.size;
241    }
242
243    protected void remove(String key) {
244        LRUFileCacheEntry entry = cache.remove(key);
245        // don't remove from files here, the GC will do it
246        cacheSize -= entry.size;
247        // delete file when not referenced anymore
248        fileCleaningTracker.track(entry.file, entry.file);
249    }
250
251    protected void ensureCapacity(long size) {
252        while (cacheSize + size > maxSize && !lru.isEmpty()) {
253            remove(lru.removeLast());
254        }
255    }
256
257}