001/*
002 * (C) Copyright 2015 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019package org.nuxeo.ecm.core.blob;
020
021import static java.util.stream.Collectors.toList;
022import static org.nuxeo.ecm.core.model.Session.PROP_ALLOW_DELETE_UNDELETABLE_DOCUMENTS;
023
024import java.time.Instant;
025import java.time.format.DateTimeParseException;
026import java.util.ArrayList;
027import java.util.Arrays;
028import java.util.Calendar;
029import java.util.Collection;
030import java.util.Collections;
031import java.util.HashSet;
032import java.util.List;
033import java.util.Map;
034import java.util.Map.Entry;
035import java.util.Objects;
036import java.util.Set;
037import java.util.function.IntPredicate;
038import java.util.regex.Matcher;
039import java.util.regex.Pattern;
040
041import org.apache.commons.logging.Log;
042import org.apache.commons.logging.LogFactory;
043import org.nuxeo.ecm.core.api.Blob;
044import org.nuxeo.ecm.core.api.DocumentSecurityException;
045import org.nuxeo.ecm.core.api.model.PropertyNotFoundException;
046import org.nuxeo.ecm.core.api.repository.RepositoryManager;
047import org.nuxeo.ecm.core.model.Document;
048import org.nuxeo.ecm.core.model.Document.BlobAccessor;
049import org.nuxeo.runtime.api.Framework;
050
051/**
052 * Default blob dispatcher, that uses the repository name as the blob provider.
053 * <p>
054 * Alternatively, it can be configured through properties to dispatch to a blob provider based on document properties
055 * instead of the repository name.
056 * <p>
057 * The property name is a list of comma-separated clauses, with each clause consisting of a property, an operator and a
058 * value. The property can be a {@link Document} xpath, {@code ecm:repositoryName}, {@code ecm:path}, or, to match the
059 * current blob being dispatched, {@code blob:name}, {@code blob:mime-type}, {@code blob:encoding}, {@code blob:digest},
060 * {@code blob:length} or {@code blob:xpath}.
061 * <p>
062 * Comma-separated clauses are ANDed together. The special name {@code default} defines the default provider, and must
063 * be present.
064 * <p>
065 * Available operators between property and value are =, !=, &lt;, &lt;= ,&gt;, &gt;=, ~ and ^.
066 * <p>
067 * The operators =, !=, &lt;, &lt;=, &gt; and &gt;= work as numeric operators if the property is numeric, otherwise as
068 * string comparisons operators.
069 * <p>
070 * The operator ~ does glob matching using {@code ?} to match a single arbitrary character, and {@code *} to match any
071 * number of characters (including none). The operator ^ does full regexp matching.
072 * <p>
073 * For example, to dispatch to the "first" provider if dc:format is "video", to the "second" provider if the blob's MIME
074 * type is "video/mp4", to the "third" provider if the blob is stored as a secondary attached file, to the "fourth"
075 * provider if the lifecycle state is "approved", to the "fifth" provider if the blob's document is stored in under an
076 * "images" folder, and the document is in the default repository, and otherwise to the "other" provider:
077 *
078 * <pre>
079 * {@code
080 * <property name="dc:format=video">first</property>
081 * <property name="blob:mime-type=video/mp4">second</property>
082 * <property name="blob:xpath~files/*&#47;file">third</property>
083 * <property name="ecm:repositoryName=default,ecm:lifeCycleState=approved">fourth</property>
084 * <property name="ecm:path^.*&#47images&#47.*">fifth</property>
085 * <property name="default">other</property>
086 * }
087 * </pre>
088 * <p>
089 * You can make use of a record blob provider by using:
090 *
091 * <pre>
092 * {@code
093 * <property name="records">records</property>
094 * <property name="default">other</property>
095 * }
096 * </pre>
097 *
098 * @since 7.3
099 */
100public class DefaultBlobDispatcher implements BlobDispatcher {
101
102    private static final Log log = LogFactory.getLog(DefaultBlobDispatcher.class);
103
104    protected static final String NAME_DEFAULT = "default";
105
106    protected static final String NAME_RECORDS = "records";
107
108    // this is a low-level xpath, without schema prefix
109    protected static final String MAIN_BLOB_XPATH = "content";
110
111    // name="records" is equivalent to the following clause:
112    protected static final String RECORDS_CLAUSE = "ecm:isRecord=true,blob:xpath=" + MAIN_BLOB_XPATH;
113
114    protected static final Pattern NAME_PATTERN = Pattern.compile("(.*?)(=|!=|<=|<|>=|>|~|\\^)(.*)");
115
116    /** Pseudo-property for the repository name. */
117    protected static final String REPOSITORY_NAME = "ecm:repositoryName";
118
119    /** Pseudo-property for the document path. */
120    protected static final String PATH = "ecm:path";
121
122    /**
123     * Pseudo-property for the record state.
124     *
125     * @since 11.1
126     */
127    protected static final String IS_RECORD = "ecm:isRecord";
128
129    protected static final String BLOB_PREFIX = "blob:";
130
131    protected static final String BLOB_NAME = "name";
132
133    protected static final String BLOB_MIME_TYPE = "mime-type";
134
135    protected static final String BLOB_ENCODING = "encoding";
136
137    protected static final String BLOB_DIGEST = "digest";
138
139    protected static final String BLOB_LENGTH = "length";
140
141    protected static final String BLOB_XPATH = "xpath";
142
143    protected enum Op {
144        EQ, NEQ, LT, LTE, GT, GTE, GLOB, RE;
145    }
146
147    protected static class Clause {
148        public final String xpath;
149
150        public final Op op;
151
152        public final Object value;
153
154        public Clause(String xpath, Op op, Object value) {
155            this.xpath = xpath;
156            this.op = op;
157            this.value = value;
158        }
159    }
160
161    protected static class Rule {
162        public final List<Clause> clauses;
163
164        public final String providerId;
165
166        public Rule(List<Clause> clauses, String providerId) {
167            this.clauses = clauses;
168            this.providerId = providerId;
169        }
170    }
171
172    // default to true when initialize is not called (default instance)
173    protected boolean useRepositoryName = true;
174
175    protected List<Rule> rules;
176
177    protected Set<String> rulesXPaths;
178
179    protected Set<String> providerIds;
180
181    protected List<String> repositoryNames;
182
183    protected String defaultProviderId;
184
185    @Override
186    public void initialize(Map<String, String> properties) {
187        providerIds = new HashSet<>();
188        rulesXPaths = new HashSet<>();
189        rules = new ArrayList<>();
190        for (Entry<String, String> en : properties.entrySet()) {
191            String clausesString = en.getKey();
192            String providerId = en.getValue();
193            providerIds.add(providerId);
194            if (clausesString.equals(NAME_RECORDS)) {
195                clausesString = RECORDS_CLAUSE;
196            }
197            if (clausesString.equals(NAME_DEFAULT)) {
198                defaultProviderId = providerId;
199            } else {
200                List<Clause> clauses = Arrays.stream(clausesString.split(","))
201                                             .map(this::getClause)
202                                             .filter(Objects::nonNull)
203                                             .collect(toList());
204                if (!clauses.isEmpty()) {
205                    rules.add(new Rule(clauses, providerId));
206                    clauses.forEach(clause -> rulesXPaths.add(clause.xpath));
207                }
208            }
209        }
210        useRepositoryName = providerIds.isEmpty();
211        if (!useRepositoryName && defaultProviderId == null) {
212            log.error("Invalid dispatcher configuration, missing default, configuration will be ignored");
213            useRepositoryName = true;
214        }
215    }
216
217    protected Clause getClause(String name) {
218        Matcher m = NAME_PATTERN.matcher(name);
219        if (m.matches()) {
220            String xpath = m.group(1);
221            String ops = m.group(2);
222            Object value = m.group(3);
223            Op op;
224            switch (ops) {
225            case "=":
226                op = Op.EQ;
227                break;
228            case "!=":
229                op = Op.NEQ;
230                break;
231            case "<":
232                op = Op.LT;
233                break;
234            case "<=":
235                op = Op.LTE;
236                break;
237            case ">":
238                op = Op.GT;
239                break;
240            case ">=":
241                op = Op.GTE;
242                break;
243            case "~":
244                op = Op.GLOB;
245                value = getPatternFromGlob((String) value);
246                break;
247            case "^":
248                op = Op.RE;
249                value = Pattern.compile((String) value);
250                break;
251            default:
252                log.error("Invalid dispatcher configuration operator: " + ops);
253                return null;
254            }
255            return new Clause(xpath, op, value);
256        } else {
257            log.error("Invalid dispatcher configuration property name: " + name);
258            return null;
259        }
260    }
261
262    protected Pattern getPatternFromGlob(String glob) {
263        // this relies on the fact that Pattern.quote wraps everything between \Q and \E
264        // so we "open" the quoting to insert the corresponding regex for * and ?
265        String regex = Pattern.quote(glob).replace("?", "\\E.\\Q").replace("*", "\\E.*\\Q");
266        return Pattern.compile(regex);
267    }
268
269    @Override
270    public Collection<String> getBlobProviderIds() {
271        if (useRepositoryName) {
272            if (repositoryNames == null) {
273                repositoryNames = Framework.getService(RepositoryManager.class).getRepositoryNames();
274            }
275            return repositoryNames;
276        }
277        return providerIds;
278    }
279
280    protected String getProviderId(Document doc, Blob blob, String blobXPath) {
281        if (useRepositoryName) {
282            return doc.getRepositoryName();
283        }
284        NEXT_RULE: //
285        for (Rule rule : rules) {
286            for (Clause clause : rule.clauses) {
287                Object value;
288                try {
289                    value = getValue(doc, blob, blobXPath, clause);
290                } catch (PropertyNotFoundException e) {
291                    continue NEXT_RULE;
292                }
293                value = convert(value);
294                if (!match(value, clause)) {
295                    continue NEXT_RULE;
296                }
297            }
298            return rule.providerId;
299        }
300        return defaultProviderId;
301    }
302
303    protected Object getValue(Document doc, Blob blob, String blobXPath, Clause clause) {
304        String xpath = clause.xpath;
305        if (xpath.equals(REPOSITORY_NAME)) {
306            return doc.getRepositoryName();
307        }
308        if (xpath.equals(PATH)) {
309            return doc.getPath();
310        }
311        if (xpath.equals(IS_RECORD)) {
312            return doc.isRecord();
313        }
314        if (xpath.startsWith(BLOB_PREFIX)) {
315            switch (xpath.substring(BLOB_PREFIX.length())) {
316            case BLOB_NAME:
317                return blob.getFilename();
318            case BLOB_MIME_TYPE:
319                return blob.getMimeType();
320            case BLOB_ENCODING:
321                return blob.getEncoding();
322            case BLOB_DIGEST:
323                return blob.getDigest();
324            case BLOB_LENGTH:
325                return blob.getLength();
326            case BLOB_XPATH:
327                return blobXPath;
328            default:
329                log.error("Invalid dispatcher configuration property name: " + xpath);
330                throw new PropertyNotFoundException(xpath);
331            }
332        }
333        try {
334            return doc.getValue(xpath);
335        } catch (PropertyNotFoundException e) {
336            return doc.getPropertyValue(xpath); // may still throw PropertyNotFoundException
337        }
338    }
339
340    protected Object convert(Object value) {
341        if (value instanceof Calendar) {
342            value = ((Calendar) value).toInstant();
343        }
344        return value;
345    }
346
347    protected boolean match(Object value, Clause clause) {
348        switch (clause.op) {
349        case EQ:
350            return compare(value, clause, true, cmp -> cmp == 0);
351        case NEQ:
352            return compare(value, clause, true, cmp -> cmp != 0);
353        case LT:
354            return compare(value, clause, false, cmp -> cmp < 0);
355        case LTE:
356            return compare(value, clause, false, cmp -> cmp <= 0);
357        case GT:
358            return compare(value, clause, false, cmp -> cmp > 0);
359        case GTE:
360            return compare(value, clause, false, cmp -> cmp >= 0);
361        case GLOB:
362        case RE:
363            return ((Pattern) clause.value).matcher(String.valueOf(value)).matches();
364        default:
365            throw new AssertionError("notreached");
366        }
367    }
368
369    protected boolean compare(Object a, Clause clause, boolean eqneq, IntPredicate predicate) {
370        String b = (String) clause.value;
371        int cmp;
372        if (a == null) {
373            if (eqneq) {
374                // treat null as the string "null" (backward compat)
375                cmp = "null".compareTo(b);
376            } else {
377                // for <, >, etc. try to treat null as 0
378                try {
379                    // try Long
380                    cmp = Long.valueOf(0).compareTo(Long.valueOf(b));
381                } catch (NumberFormatException e) {
382                    try {
383                        // try Double
384                        cmp = Double.valueOf(0).compareTo(Double.valueOf(b));
385                    } catch (NumberFormatException e2) {
386                        // else treat null as empty string
387                        cmp = "".compareTo(b);
388                    }
389                }
390            }
391        } else {
392            if (a instanceof Long) {
393                try {
394                    cmp = ((Long) a).compareTo(Long.valueOf(b));
395                } catch (NumberFormatException e) {
396                    if (!eqneq) {
397                        return false; // no match
398                    }
399                    cmp = 1; // different
400                }
401            } else if (a instanceof Double) {
402                try {
403                    cmp = ((Double) a).compareTo(Double.valueOf(b));
404                } catch (NumberFormatException e) {
405                    if (!eqneq) {
406                        return false; // no match
407                    }
408                    cmp = 1; // different
409                }
410            } else if (a instanceof Instant) {
411                try {
412                    cmp = ((Instant) a).compareTo(Instant.parse(b));
413                } catch (DateTimeParseException e) {
414                    if (!eqneq) {
415                        return false; // no match
416                    }
417                    cmp = 1; // different
418                }
419            } else {
420                cmp = String.valueOf(a).compareTo(b);
421            }
422        }
423        return predicate.test(cmp);
424    }
425
426    @Override
427    public String getBlobProvider(String repositoryName) {
428        if (useRepositoryName) {
429            return repositoryName;
430        }
431        // useful for legacy blobs created without prefix before dispatch was configured
432        return defaultProviderId;
433    }
434
435    @Override
436    public BlobDispatch getBlobProvider(Document doc, Blob blob, String xpath) {
437        if (useRepositoryName) {
438            String providerId = doc.getRepositoryName();
439            return new BlobDispatch(providerId, false);
440        }
441        String providerId = getProviderId(doc, blob, xpath);
442        return new BlobDispatch(providerId, true);
443    }
444
445    @Override
446    public void notifyChanges(Document doc, Set<String> xpaths) {
447        if (useRepositoryName) {
448            return;
449        }
450        for (String xpath : rulesXPaths) {
451            if (xpaths.contains(xpath)) {
452                doc.visitBlobs(accessor -> checkBlob(doc, accessor));
453                return;
454            }
455        }
456    }
457
458    /**
459     * Checks if the blob is stored in the expected blob provider to which it's supposed to be dispatched. If not,
460     * store it in the correct one (and maybe remove it from the previous one if it makes sense).
461     */
462    protected void checkBlob(Document doc, BlobAccessor accessor) {
463        Blob blob = accessor.getBlob();
464        if (!(blob instanceof ManagedBlob)) {
465            return;
466        }
467        String xpath = accessor.getXPath();
468        // compare current provider with expected
469        ManagedBlob managedBlob = (ManagedBlob) blob;
470        String previousProviderId = managedBlob.getProviderId();
471        String expectedProviderId = getProviderId(doc, blob, xpath);
472        if (previousProviderId.equals(expectedProviderId)) {
473            return;
474        }
475        // re-dispatch blob to new blob provider
476        // this calls back into blobProvider.writeBlob for the expected blob provider
477        accessor.setBlob(blob);
478        // if old blob provider is in record mode, delete from it
479        deleteBlobIfRecord(previousProviderId, doc, xpath);
480    }
481
482    @Override
483    public void notifyMakeRecord(Document doc) {
484        notifyChanges(doc, Collections.singleton(IS_RECORD));
485    }
486
487    @Override
488    public void notifyAfterCopy(Document doc) {
489        notifyChanges(doc, Collections.singleton(IS_RECORD));
490    }
491
492    // TODO move this to caller
493
494    @Override
495    public void notifyBeforeRemove(Document doc) {
496        String xpath = MAIN_BLOB_XPATH;
497        Blob blob;
498        try {
499            blob = (Blob) doc.getValue(xpath);
500        } catch (PropertyNotFoundException e) {
501            return;
502        }
503        if (!(blob instanceof ManagedBlob)) {
504            return;
505        }
506        String blobProviderId = ((ManagedBlob) blob).getProviderId();
507        deleteBlobIfRecord(blobProviderId, doc, xpath);
508    }
509
510    protected void deleteBlobIfRecord(String blobProviderId, Document doc, String xpath) {
511        BlobProvider blobProvider = Framework.getService(BlobManager.class).getBlobProvider(blobProviderId);
512        if (blobProvider != null && blobProvider.isRecordMode()) {
513            checkBlobCanBeDeleted(doc, xpath);
514            blobProvider.deleteBlob(new BlobContext(doc, xpath));
515        }
516    }
517
518    protected void checkBlobCanBeDeleted(Document doc, String xpath) {
519        if (MAIN_BLOB_XPATH.equals(xpath) && doc.isUnderRetentionOrLegalHold()) {
520            boolean allowDeleteUndeletable = Framework.isBooleanPropertyTrue(PROP_ALLOW_DELETE_UNDELETABLE_DOCUMENTS);
521            if (allowDeleteUndeletable) {
522                // in unit tests allow this
523                return;
524            }
525            throw new DocumentSecurityException(
526                    "Cannot remove main blob from document " + doc.getUUID() + ", it is under retention / hold");
527        }
528    }
529
530}