001/*
002 * (C) Copyright 2006-2010 Nuxeo SA (http://nuxeo.com/) and contributors.
003 *
004 * All rights reserved. This program and the accompanying materials
005 * are made available under the terms of the GNU Lesser General Public License
006 * (LGPL) version 2.1 which accompanies this distribution, and is available at
007 * http://www.gnu.org/licenses/lgpl.html
008 *
009 * This library is distributed in the hope that it will be useful,
010 * but WITHOUT ANY WARRANTY; without even the implied warranty of
011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
012 * Lesser General Public License for more details.
013 *
014 * Contributors:
015 *     Florent Guillaume
016 */
017package org.nuxeo.ecm.platform.htmlsanitizer;
018
019import java.io.InputStream;
020import java.io.Serializable;
021import java.util.ArrayList;
022import java.util.Iterator;
023import java.util.LinkedList;
024import java.util.List;
025
026import org.apache.commons.logging.Log;
027import org.apache.commons.logging.LogFactory;
028import org.nuxeo.ecm.core.api.DocumentModel;
029import org.nuxeo.ecm.core.api.model.Property;
030import org.nuxeo.ecm.core.api.model.PropertyNotFoundException;
031import org.nuxeo.runtime.model.ComponentInstance;
032import org.nuxeo.runtime.model.DefaultComponent;
033import org.owasp.validator.html.AntiSamy;
034import org.owasp.validator.html.CleanResults;
035import org.owasp.validator.html.Policy;
036import org.owasp.validator.html.PolicyException;
037import org.owasp.validator.html.ScanException;
038
039/**
040 * Service that sanitizes some HMTL fields to remove potential cross-site scripting attacks in them.
041 */
042public class HtmlSanitizerServiceImpl extends DefaultComponent implements HtmlSanitizerService {
043
044    private static final Log log = LogFactory.getLog(HtmlSanitizerServiceImpl.class);
045
046    public static final String ANTISAMY_XP = "antisamy";
047
048    public static final String SANITIZER_XP = "sanitizer";
049
050    /** All policies registered. */
051    public LinkedList<HtmlSanitizerAntiSamyDescriptor> allPolicies = new LinkedList<HtmlSanitizerAntiSamyDescriptor>();
052
053    /** Effective policy. */
054    public Policy policy;
055
056    /** All sanitizers registered. */
057    public List<HtmlSanitizerDescriptor> allSanitizers = new ArrayList<HtmlSanitizerDescriptor>(1);
058
059    /** Effective sanitizers. */
060    public List<HtmlSanitizerDescriptor> sanitizers = new ArrayList<HtmlSanitizerDescriptor>(1);
061
062    @Override
063    public void registerContribution(Object contribution, String extensionPoint, ComponentInstance contributor) {
064        if (ANTISAMY_XP.equals(extensionPoint)) {
065            if (!(contribution instanceof HtmlSanitizerAntiSamyDescriptor)) {
066                log.error("Contribution " + contribution + " is not of type "
067                        + HtmlSanitizerAntiSamyDescriptor.class.getName());
068                return;
069            }
070            HtmlSanitizerAntiSamyDescriptor desc = (HtmlSanitizerAntiSamyDescriptor) contribution;
071            log.info("Registering AntiSamy policy: " + desc.policy);
072            addAntiSamy(desc);
073        } else if (SANITIZER_XP.equals(extensionPoint)) {
074            if (!(contribution instanceof HtmlSanitizerDescriptor)) {
075                log.error("Contribution " + contribution + " is not of type " + HtmlSanitizerDescriptor.class.getName());
076                return;
077            }
078            HtmlSanitizerDescriptor desc = (HtmlSanitizerDescriptor) contribution;
079            log.info("Registering HTML sanitizer: " + desc);
080            addSanitizer(desc);
081        } else {
082            log.error("Contribution extension point should be '" + SANITIZER_XP + "' but is: " + extensionPoint);
083        }
084    }
085
086    @Override
087    public void unregisterContribution(Object contribution, String extensionPoint, ComponentInstance contributor) {
088        if (ANTISAMY_XP.equals(extensionPoint)) {
089            if (!(contribution instanceof HtmlSanitizerAntiSamyDescriptor)) {
090                return;
091            }
092            HtmlSanitizerAntiSamyDescriptor desc = (HtmlSanitizerAntiSamyDescriptor) contribution;
093            log.info("Unregistering AntiSamy policy: " + desc.policy);
094            removeAntiSamy(desc);
095        } else if (SANITIZER_XP.equals(extensionPoint)) {
096            if (!(contribution instanceof HtmlSanitizerDescriptor)) {
097                return;
098            }
099            HtmlSanitizerDescriptor desc = (HtmlSanitizerDescriptor) contribution;
100            log.info("Unregistering HTML sanitizer: " + desc);
101            removeSanitizer(desc);
102        }
103    }
104
105    protected void addAntiSamy(HtmlSanitizerAntiSamyDescriptor desc) {
106        if (Thread.currentThread().getContextClassLoader().getResourceAsStream(desc.policy) == null) {
107            log.error("Cannot find AntiSamy policy: " + desc.policy);
108            return;
109        }
110        allPolicies.add(desc);
111        refreshPolicy();
112    }
113
114    protected void removeAntiSamy(HtmlSanitizerAntiSamyDescriptor desc) {
115        allPolicies.remove(desc);
116        refreshPolicy();
117    }
118
119    protected void refreshPolicy() {
120        if (allPolicies.isEmpty()) {
121            policy = null;
122        } else {
123            HtmlSanitizerAntiSamyDescriptor desc = allPolicies.removeLast();
124            InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream(desc.policy);
125            try {
126                policy = Policy.getInstance(is);
127            } catch (PolicyException e) {
128                policy = null;
129                throw new RuntimeException("Cannot parse AntiSamy policy: " + desc.policy, e);
130            }
131        }
132    }
133
134    protected Policy getPolicy() {
135        return policy;
136    }
137
138    protected void addSanitizer(HtmlSanitizerDescriptor desc) {
139        if (desc.fields.isEmpty()) {
140            log.error("Sanitizer has no fields: " + desc);
141            return;
142        }
143        allSanitizers.add(desc);
144        refreshSanitizers();
145    }
146
147    protected void removeSanitizer(HtmlSanitizerDescriptor desc) {
148        allSanitizers.remove(desc);
149        refreshSanitizers();
150    }
151
152    protected void refreshSanitizers() {
153        // not very efficient algorithm but who cares?
154        sanitizers.clear();
155        for (HtmlSanitizerDescriptor sanitizer : allSanitizers) {
156            // remove existing with same name
157            for (Iterator<HtmlSanitizerDescriptor> it = sanitizers.iterator(); it.hasNext();) {
158                HtmlSanitizerDescriptor s = it.next();
159                if (s.name.equals(sanitizer.name)) {
160                    it.remove();
161                    break;
162                }
163            }
164            // add new one if enabled
165            if (sanitizer.enabled) {
166                sanitizers.add(sanitizer);
167            }
168        }
169    }
170
171    protected List<HtmlSanitizerDescriptor> getSanitizers() {
172        return sanitizers;
173    }
174
175    // ----- HtmlSanitizerService -----
176
177    @Override
178    public void sanitizeDocument(DocumentModel doc) {
179        if (policy == null) {
180            log.error("Cannot sanitize, no policy registered");
181            return;
182        }
183        for (HtmlSanitizerDescriptor sanitizer : sanitizers) {
184            if (!sanitizer.types.isEmpty() && !sanitizer.types.contains(doc.getType())) {
185                continue;
186            }
187            for (FieldDescriptor field : sanitizer.fields) {
188                String fieldName = field.getContentField();
189                String filterField = field.getFilterField();
190                if (filterField != null) {
191                    Property filterProp;
192                    try {
193                        filterProp = doc.getProperty(filterField);
194                    } catch (PropertyNotFoundException e) {
195                        continue;
196                    }
197                    if (field.match(String.valueOf(filterProp.getValue())) != field.doSanitize()) {
198                        continue;
199                    }
200                }
201                Property prop;
202                try {
203                    prop = doc.getProperty(fieldName);
204                } catch (PropertyNotFoundException e) {
205                    continue;
206                }
207                Serializable value = prop.getValue();
208                if (value == null) {
209                    continue;
210                }
211                if (!(value instanceof String)) {
212                    log.debug("Cannot sanitize non-string field: " + field);
213                    continue;
214                }
215                String info = "doc " + doc.getPathAsString() + " (" + doc.getId() + ") field " + field;
216                String newValue = sanitizeString((String) value, info);
217                if (!newValue.equals(value)) {
218                    prop.setValue(newValue);
219                }
220            }
221        }
222    }
223
224    @Override
225    public String sanitizeString(String string, String info) {
226        if (policy == null) {
227            log.error("Cannot sanitize, no policy registered");
228            return string;
229        }
230        try {
231            CleanResults cr = new AntiSamy().scan(string, policy);
232            for (Object err : cr.getErrorMessages()) {
233                log.debug(String.format("Sanitizing %s: %s", info == null ? "" : info, err));
234            }
235            return cr.getCleanHTML();
236        } catch (ScanException | PolicyException e) {
237            log.error(String.format("Cannot sanitize %s: %s", info == null ? "" : info, e));
238            return string;
239        }
240    }
241
242}