001/*
002 * (C) Copyright 2006-2010 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019package org.nuxeo.ecm.platform.htmlsanitizer;
020
021import java.io.InputStream;
022import java.io.Serializable;
023import java.util.ArrayList;
024import java.util.Iterator;
025import java.util.LinkedList;
026import java.util.List;
027
028import org.apache.commons.logging.Log;
029import org.apache.commons.logging.LogFactory;
030import org.nuxeo.ecm.core.api.DocumentModel;
031import org.nuxeo.ecm.core.api.model.Property;
032import org.nuxeo.ecm.core.api.model.PropertyNotFoundException;
033import org.nuxeo.runtime.model.ComponentInstance;
034import org.nuxeo.runtime.model.DefaultComponent;
035import org.owasp.validator.html.AntiSamy;
036import org.owasp.validator.html.CleanResults;
037import org.owasp.validator.html.Policy;
038import org.owasp.validator.html.PolicyException;
039import org.owasp.validator.html.ScanException;
040
041/**
042 * Service that sanitizes some HMTL fields to remove potential cross-site scripting attacks in them.
043 */
044public class HtmlSanitizerServiceImpl extends DefaultComponent implements HtmlSanitizerService {
045
046    private static final Log log = LogFactory.getLog(HtmlSanitizerServiceImpl.class);
047
048    public static final String ANTISAMY_XP = "antisamy";
049
050    public static final String SANITIZER_XP = "sanitizer";
051
052    /** All policies registered. */
053    public LinkedList<HtmlSanitizerAntiSamyDescriptor> allPolicies = new LinkedList<HtmlSanitizerAntiSamyDescriptor>();
054
055    /** Effective policy. */
056    public Policy policy;
057
058    /** All sanitizers registered. */
059    public List<HtmlSanitizerDescriptor> allSanitizers = new ArrayList<HtmlSanitizerDescriptor>(1);
060
061    /** Effective sanitizers. */
062    public List<HtmlSanitizerDescriptor> sanitizers = new ArrayList<HtmlSanitizerDescriptor>(1);
063
064    @Override
065    public void registerContribution(Object contribution, String extensionPoint, ComponentInstance contributor) {
066        if (ANTISAMY_XP.equals(extensionPoint)) {
067            if (!(contribution instanceof HtmlSanitizerAntiSamyDescriptor)) {
068                log.error("Contribution " + contribution + " is not of type "
069                        + HtmlSanitizerAntiSamyDescriptor.class.getName());
070                return;
071            }
072            HtmlSanitizerAntiSamyDescriptor desc = (HtmlSanitizerAntiSamyDescriptor) contribution;
073            log.info("Registering AntiSamy policy: " + desc.policy);
074            addAntiSamy(desc);
075        } else if (SANITIZER_XP.equals(extensionPoint)) {
076            if (!(contribution instanceof HtmlSanitizerDescriptor)) {
077                log.error("Contribution " + contribution + " is not of type " + HtmlSanitizerDescriptor.class.getName());
078                return;
079            }
080            HtmlSanitizerDescriptor desc = (HtmlSanitizerDescriptor) contribution;
081            log.info("Registering HTML sanitizer: " + desc);
082            addSanitizer(desc);
083        } else {
084            log.error("Contribution extension point should be '" + SANITIZER_XP + "' but is: " + extensionPoint);
085        }
086    }
087
088    @Override
089    public void unregisterContribution(Object contribution, String extensionPoint, ComponentInstance contributor) {
090        if (ANTISAMY_XP.equals(extensionPoint)) {
091            if (!(contribution instanceof HtmlSanitizerAntiSamyDescriptor)) {
092                return;
093            }
094            HtmlSanitizerAntiSamyDescriptor desc = (HtmlSanitizerAntiSamyDescriptor) contribution;
095            log.info("Unregistering AntiSamy policy: " + desc.policy);
096            removeAntiSamy(desc);
097        } else if (SANITIZER_XP.equals(extensionPoint)) {
098            if (!(contribution instanceof HtmlSanitizerDescriptor)) {
099                return;
100            }
101            HtmlSanitizerDescriptor desc = (HtmlSanitizerDescriptor) contribution;
102            log.info("Unregistering HTML sanitizer: " + desc);
103            removeSanitizer(desc);
104        }
105    }
106
107    protected void addAntiSamy(HtmlSanitizerAntiSamyDescriptor desc) {
108        if (Thread.currentThread().getContextClassLoader().getResourceAsStream(desc.policy) == null) {
109            log.error("Cannot find AntiSamy policy: " + desc.policy);
110            return;
111        }
112        allPolicies.add(desc);
113        refreshPolicy();
114    }
115
116    protected void removeAntiSamy(HtmlSanitizerAntiSamyDescriptor desc) {
117        allPolicies.remove(desc);
118        refreshPolicy();
119    }
120
121    protected void refreshPolicy() {
122        if (allPolicies.isEmpty()) {
123            policy = null;
124        } else {
125            HtmlSanitizerAntiSamyDescriptor desc = allPolicies.removeLast();
126            InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream(desc.policy);
127            try {
128                policy = Policy.getInstance(is);
129            } catch (PolicyException e) {
130                policy = null;
131                throw new RuntimeException("Cannot parse AntiSamy policy: " + desc.policy, e);
132            }
133        }
134    }
135
136    protected Policy getPolicy() {
137        return policy;
138    }
139
140    protected void addSanitizer(HtmlSanitizerDescriptor desc) {
141        if (desc.fields.isEmpty()) {
142            log.error("Sanitizer has no fields: " + desc);
143            return;
144        }
145        allSanitizers.add(desc);
146        refreshSanitizers();
147    }
148
149    protected void removeSanitizer(HtmlSanitizerDescriptor desc) {
150        allSanitizers.remove(desc);
151        refreshSanitizers();
152    }
153
154    protected void refreshSanitizers() {
155        // not very efficient algorithm but who cares?
156        sanitizers.clear();
157        for (HtmlSanitizerDescriptor sanitizer : allSanitizers) {
158            // remove existing with same name
159            for (Iterator<HtmlSanitizerDescriptor> it = sanitizers.iterator(); it.hasNext();) {
160                HtmlSanitizerDescriptor s = it.next();
161                if (s.name.equals(sanitizer.name)) {
162                    it.remove();
163                    break;
164                }
165            }
166            // add new one if enabled
167            if (sanitizer.enabled) {
168                sanitizers.add(sanitizer);
169            }
170        }
171    }
172
173    protected List<HtmlSanitizerDescriptor> getSanitizers() {
174        return sanitizers;
175    }
176
177    // ----- HtmlSanitizerService -----
178
179    @Override
180    public void sanitizeDocument(DocumentModel doc) {
181        if (policy == null) {
182            log.error("Cannot sanitize, no policy registered");
183            return;
184        }
185        for (HtmlSanitizerDescriptor sanitizer : sanitizers) {
186            if (!sanitizer.types.isEmpty() && !sanitizer.types.contains(doc.getType())) {
187                continue;
188            }
189            for (FieldDescriptor field : sanitizer.fields) {
190                String fieldName = field.getContentField();
191                String filterField = field.getFilterField();
192                if (filterField != null) {
193                    Property filterProp;
194                    try {
195                        filterProp = doc.getProperty(filterField);
196                    } catch (PropertyNotFoundException e) {
197                        continue;
198                    }
199                    if (field.match(String.valueOf(filterProp.getValue())) != field.doSanitize()) {
200                        continue;
201                    }
202                }
203                Property prop;
204                try {
205                    prop = doc.getProperty(fieldName);
206                } catch (PropertyNotFoundException e) {
207                    continue;
208                }
209                Serializable value = prop.getValue();
210                if (value == null) {
211                    continue;
212                }
213                if (!(value instanceof String)) {
214                    log.debug("Cannot sanitize non-string field: " + field);
215                    continue;
216                }
217                String info = "doc " + doc.getPathAsString() + " (" + doc.getId() + ") field " + field;
218                String newValue = sanitizeString((String) value, info);
219                if (!newValue.equals(value)) {
220                    prop.setValue(newValue);
221                }
222            }
223        }
224    }
225
226    @Override
227    public String sanitizeString(String string, String info) {
228        if (policy == null) {
229            log.error("Cannot sanitize, no policy registered");
230            return string;
231        }
232        try {
233            CleanResults cr = new AntiSamy().scan(string, policy);
234            for (Object err : cr.getErrorMessages()) {
235                log.debug(String.format("Sanitizing %s: %s", info == null ? "" : info, err));
236            }
237            return cr.getCleanHTML();
238        } catch (ScanException | PolicyException e) {
239            log.error(String.format("Cannot sanitize %s: %s", info == null ? "" : info, e));
240            return string;
241        }
242    }
243
244}