001/*
002 * (C) Copyright 2002-2015 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Nuxeo - initial API and implementation
018 */
019
020package org.nuxeo.ecm.platform.importer.xml.parser;
021
022import java.io.File;
023import java.io.IOException;
024import java.io.InputStream;
025import java.io.Serializable;
026import java.util.ArrayList;
027import java.util.HashMap;
028import java.util.List;
029import java.util.Map;
030import java.util.Stack;
031
032import org.apache.commons.io.FileUtils;
033import org.apache.commons.logging.Log;
034import org.apache.commons.logging.LogFactory;
035import org.dom4j.Attribute;
036import org.dom4j.Document;
037import org.dom4j.DocumentException;
038import org.dom4j.Element;
039import org.dom4j.InvalidXPathException;
040import org.dom4j.Node;
041import org.dom4j.Text;
042import org.dom4j.io.SAXReader;
043import org.dom4j.tree.DefaultText;
044import org.mvel2.MVEL;
045import org.nuxeo.common.Environment;
046import org.nuxeo.common.utils.ExceptionUtils;
047import org.nuxeo.common.utils.ZipUtils;
048import org.nuxeo.ecm.automation.AutomationService;
049import org.nuxeo.ecm.automation.OperationContext;
050import org.nuxeo.ecm.core.api.Blob;
051import org.nuxeo.ecm.core.api.Blobs;
052import org.nuxeo.ecm.core.api.CoreSession;
053import org.nuxeo.ecm.core.api.DocumentModel;
054import org.nuxeo.ecm.core.api.NuxeoException;
055import org.nuxeo.ecm.core.api.model.Property;
056import org.nuxeo.ecm.core.api.model.impl.primitives.BlobProperty;
057import org.nuxeo.ecm.core.schema.types.ListType;
058import org.nuxeo.runtime.api.Framework;
059
060/**
061 * Main implementation class for delivering the Import logic
062 *
063 * @author <a href="mailto:tdelprat@nuxeo.com">Tiry</a>
064 */
065public class XMLImporterServiceImpl {
066
067    protected static final String FILE_PROPERTY = "file";
068
069    protected static final String CONTENT_PROPERTY = "content";
070
071    protected static final String MIME_TYPE_PROPERTY = "mimetype";
072
073    protected static final String FILE_NAME_PROPERTY = "filename";
074
075    private static final String MSG_NO_ELEMENT_FOUND = "**CREATION**\n"
076            + "No element \"%s\" found in %s, use the DOC_TYPE-INDEX value";
077
078    private static final String MSG_CREATION = "**CREATION**\n"
079            + "Try to create document in %s with name %s based on \"%s\" fragment " + "with the following conf: %s\n";
080
081    private static final String MSG_UPDATE = "**DOCUMENT UPDATE**\n"
082            + "Try to update document in %s with name %s based on \"%s\" fragment " + "with the following conf: %s\n";
083
084    private static final String MSG_UPDATE_PROPERTY_TRACE = "**PROPERTY UPDATE**\n"
085            + "Value found for %s in %s is \"%s\". With the following conf: %s";
086
087    private static final String MSG_UPDATE_PROPERTY = "**PROPERTY UPDATE**\n"
088            + "Try to set value into %s property based on %s element on document \"%s\" (%s). Conf activated: %s";
089
090    public static final Log log = LogFactory.getLog(XMLImporterServiceImpl.class);
091
092    public static final String XML_IMPORTER_INITIALIZATION = "org.nuxeo.xml.importer.initialization";
093
094    protected CoreSession session;
095
096    protected DocumentModel rootDoc;
097
098    protected Stack<DocumentModel> docsStack;
099
100    protected Map<String, List<String>> deletedAttributes = new HashMap<>();
101
102    protected Map<String, Object> mvelCtx = new HashMap<>();
103
104    protected Map<Element, DocumentModel> elToDoc = new HashMap<>();
105
106    protected ParserConfigRegistry registry;
107
108    protected Boolean deferSave = false;
109
110    public XMLImporterServiceImpl(DocumentModel rootDoc, ParserConfigRegistry registry) {
111        this(rootDoc, registry, null, false);
112    }
113
114    public XMLImporterServiceImpl(DocumentModel rootDoc, ParserConfigRegistry registry, Map<String, Object> mvelContext,
115            boolean deferSave) {
116        if (mvelContext != null) {
117            mvelCtx.putAll(mvelContext);
118        }
119
120        session = rootDoc.getCoreSession();
121        this.rootDoc = rootDoc;
122        this.deferSave = deferSave;
123
124        docsStack = new Stack<>();
125        pushInStack(rootDoc);
126        mvelCtx.put("root", rootDoc);
127        mvelCtx.put("docs", docsStack);
128        mvelCtx.put("session", session);
129
130        this.registry = registry;
131    }
132
133    protected ParserConfigRegistry getRegistry() {
134        return registry;
135    }
136
137    protected DocConfigDescriptor getDocCreationConfig(Element el) {
138        for (DocConfigDescriptor conf : getRegistry().getDocCreationConfigs()) {
139            // direct tagName match
140            if (conf.getTagName().equals(el.getName())) {
141                return conf;
142            } else {
143                // try xpath match
144                try {
145                    if (el.matches(conf.getTagName())) {
146                        return conf;
147                    }
148                } catch (InvalidXPathException e) {
149                    // NOP
150                }
151            }
152        }
153        return null;
154    }
155
156    protected List<AttributeConfigDescriptor> getAttributConfigs(Element el) {
157        List<AttributeConfigDescriptor> result = new ArrayList<>();
158        for (AttributeConfigDescriptor conf : getRegistry().getAttributConfigs()) {
159            if (conf.getTagName().equals(el.getName())) {
160                result.add(conf);
161            } else {
162                // try xpath match
163                try {
164                    if (el.matches(conf.getTagName())) {
165                        result.add(conf);
166                    }
167                } catch (InvalidXPathException e) {
168                    // NOP
169                }
170            }
171        }
172        return result;
173    }
174
175    protected File workingDirectory;
176
177    public List<DocumentModel> parse(InputStream is) throws IOException {
178        mvelCtx.put("source", is);
179        try {
180            Document doc;
181            doc = new SAXReader().read(is);
182            workingDirectory = null;
183            return parse(doc);
184        } catch (DocumentException e) {
185            throw new IOException(e);
186        }
187    }
188
189    public List<DocumentModel> parse(File file) throws IOException {
190        mvelCtx.put("source", file);
191
192        Document doc = null;
193        File directory = null;
194        try {
195            doc = new SAXReader().read(file);
196            workingDirectory = file.getParentFile();
197        } catch (DocumentException e) {
198            File tmp = Environment.getDefault().getTemp();
199            directory = new File(tmp, file.getName() + System.currentTimeMillis());
200            directory.mkdir();
201            ZipUtils.unzip(file, directory);
202            for (File child : directory.listFiles()) {
203                if (child.getName().endsWith(".xml")) {
204                    return parse(child);
205                }
206            }
207            throw new NuxeoException("Can not find XML file inside the zip archive", e);
208        } finally {
209            FileUtils.deleteQuietly(directory);
210        }
211        return parse(doc);
212    }
213
214    public List<DocumentModel> parse(Document doc) {
215        Element root = doc.getRootElement();
216        elToDoc = new HashMap<>();
217        mvelCtx.put("xml", doc);
218        mvelCtx.put("map", elToDoc);
219        process(root);
220
221        // defer saveDocument to end of operation
222        if (deferSave) {
223            ArrayList<DocumentModel> a = new ArrayList<>();
224            DocumentModel d = null;
225            while (docsStack.size() > 0) {
226                d = popStack();
227                d.putContextData(XML_IMPORTER_INITIALIZATION, Boolean.TRUE);
228                d = session.saveDocument(d);
229                a.add(d);
230            }
231            return a;
232        } else {
233            return new ArrayList<>(docsStack);
234        }
235    }
236
237    protected Object resolveComplex(Element el, AttributeConfigDescriptor conf) {
238        Map<String, Object> propValue = new HashMap<>();
239        for (String name : conf.getMapping().keySet()) {
240            propValue.put(name, resolveAndEvaluateXmlNode(el, conf.getMapping().get(name)));
241        }
242
243        return propValue;
244    }
245
246    protected Blob resolveBlob(Element el, AttributeConfigDescriptor conf, String propertyName) {
247        @SuppressWarnings("unchecked")
248        Map<String, Object> propValues = (Map<String, Object>) resolveComplex(el, conf);
249
250        if (propValues.containsKey(propertyName)) {
251            try {
252                Blob blob = null;
253                String content = (String) propValues.get(propertyName);
254                if (content != null && workingDirectory != null) {
255                    File file = new File(workingDirectory, content.trim());
256                    if (file.exists()) {
257                        blob = Blobs.createBlob(file);
258                    }
259                }
260                if (blob == null && content != null) {
261                    blob = Blobs.createBlob(content);
262                }
263
264                if (blob != null) {
265                    if (propValues.containsKey(MIME_TYPE_PROPERTY)) {
266                        blob.setMimeType((String) propValues.get(MIME_TYPE_PROPERTY));
267                    }
268                    if (propValues.containsKey(FILE_NAME_PROPERTY)) {
269                        blob.setFilename((String) propValues.get(FILE_NAME_PROPERTY));
270                    }
271                }
272
273                return blob;
274            } catch (IOException e) {
275                throw new RuntimeException(e);
276            }
277        }
278        return null;
279    }
280
281    @SuppressWarnings("unchecked")
282    protected void processDocAttributes(DocumentModel doc, Element el, AttributeConfigDescriptor conf) {
283        String targetDocProperty = conf.getTargetDocProperty();
284
285        if (log.isDebugEnabled()) {
286            log.debug(String.format(MSG_UPDATE_PROPERTY, targetDocProperty, el.getUniquePath(), doc.getPathAsString(),
287                    doc.getType(), conf.toString()));
288        }
289        Property property = doc.getProperty(targetDocProperty);
290
291        if (property.isScalar()) {
292            Object value = resolveAndEvaluateXmlNode(el, conf.getSingleXpath());
293            if (log.isTraceEnabled()) {
294                log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value,
295                        conf.toString()));
296            }
297            property.setValue(value);
298
299        } else if (property.isComplex()) {
300
301            if (property instanceof BlobProperty) {
302                Object value = resolveBlob(el, conf, CONTENT_PROPERTY);
303                if (log.isTraceEnabled()) {
304                    log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value,
305                            conf.toString()));
306                }
307                property.setValue(value);
308            } else {
309                Object value = resolveComplex(el, conf);
310                if (log.isTraceEnabled()) {
311                    log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value,
312                            conf.toString()));
313                }
314                property.setValue(value);
315            }
316
317        } else if (property.isList()) {
318
319            ListType lType = (ListType) property.getType();
320
321            Serializable value;
322
323            if (lType.getFieldType().isSimpleType()) {
324                value = (Serializable) resolveAndEvaluateXmlNode(el, conf.getSingleXpath());
325                if (value != null) {
326                    Object values = property.getValue();
327                    if (values == null) {
328                        property.setValue(new Object[] { value });
329                    } else if (values instanceof Object[]) {
330                        int len = ((Object[]) values).length;
331                        Object[] newValues = new Object[len + 1];
332                        System.arraycopy(values, 0, newValues, 0, len);
333                        newValues[len] = value;
334                        property.setValue(newValues);
335                    } else {
336                        log.error("Simple multi value property " + targetDocProperty + " is not an Array");
337                    }
338                }
339            } else {
340                Map<String, Object> props = (Map<String, Object>) resolveComplex(el, conf);
341                if (props.containsKey(FILE_PROPERTY)) {
342                    Blob blob = resolveBlob(el, conf, FILE_PROPERTY);
343                    props.put(FILE_PROPERTY, blob);
344                }
345                property.addValue(props);
346                value = (Serializable) props;
347            }
348
349            if (log.isTraceEnabled()) {
350                log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value,
351                        conf.toString()));
352            }
353        }
354    }
355
356    protected Map<String, Object> getMVELContext(Element el) {
357        mvelCtx.put("currentDocument", docsStack.peek());
358        mvelCtx.put("currentElement", el);
359        mvelCtx.put("Fn", new MVELImporterFunction(session, docsStack, elToDoc, el));
360        return mvelCtx;
361    }
362
363    protected Object resolve(Element el, String xpr) {
364        if (xpr == null) {
365            return null;
366        }
367
368        if (xpr.startsWith("#{") && xpr.endsWith("}")) { // MVEL
369            xpr = xpr.substring(2, xpr.length() - 1);
370            return resolveMVEL(el, xpr);
371        } else if (xpr.contains("{{")) { // String containing XPaths
372            StringBuilder sb = new StringBuilder();
373            int idx = xpr.indexOf("{{");
374            while (idx >= 0) {
375                int idx2 = xpr.indexOf("}}", idx);
376                if (idx2 > 0) {
377                    sb.append(xpr.substring(0, idx));
378                    String xpath = xpr.substring(idx + 2, idx2);
379                    sb.append(resolveAndEvaluateXmlNode(el, xpath));
380                    xpr = xpr.substring(idx2);
381                } else {
382                    sb.append(xpr);
383                    xpr = "";
384                }
385                idx = xpr.indexOf("{{");
386            }
387            return sb.toString();
388        } else {
389            return resolveXP(el, xpr); // default to pure XPATH
390        }
391    }
392
393    protected Object resolveMVEL(Element el, String xpr) {
394        Map<String, Object> ctx = new HashMap<>(getMVELContext(el));
395        Serializable compiled = MVEL.compileExpression(xpr);
396        return MVEL.executeExpression(compiled, ctx);
397    }
398
399    protected Object resolveXP(Element el, String xpr) {
400        List<Node> nodes = el.selectNodes(xpr);
401        if (nodes.size() == 1) {
402            return nodes.get(0);
403        } else if (nodes.size() > 1) {
404            // Workaround for NXP-11834
405            if (xpr.endsWith("text()")) {
406                String value = "";
407                for (Node node : nodes) {
408                    if (!(node instanceof DefaultText)) {
409                        String msg = "Text selector must return a string (expr:\"%s\") element %s";
410                        log.error(String.format(msg, xpr, el.getStringValue()));
411                        return value;
412                    }
413                    value += ((DefaultText) node).getText();
414                }
415                return new DefaultText(value);
416            }
417            return nodes;
418        }
419        return null;
420    }
421
422    protected String resolvePath(Element el, String xpr) {
423        Object ob = resolve(el, xpr);
424        if (ob == null) {
425            for (int i = 0; i < docsStack.size(); i++) {
426                if (docsStack.get(i).isFolder()) {
427                    return docsStack.get(i).getPathAsString();
428                }
429            }
430        } else {
431            if (ob instanceof DocumentModel) {
432                return ((DocumentModel) ob).getPathAsString();
433            } else if (ob instanceof Node) {
434                if (ob instanceof Element) {
435                    Element targetElement = (Element) ob;
436                    DocumentModel target = elToDoc.get(targetElement);
437                    if (target != null) {
438                        return target.getPathAsString();
439                    } else {
440                        return targetElement.getText();
441                    }
442                } else if (ob instanceof Attribute) {
443                    return ((Attribute) ob).getValue();
444                } else if (ob instanceof Text) {
445                    return ((Text) ob).getText();
446                } else if (ob.getClass().isAssignableFrom(Attribute.class)) {
447                    return ((Attribute) ob).getValue();
448                }
449            } else {
450                return ob.toString();
451            }
452        }
453        return rootDoc.getPathAsString();
454    }
455
456    protected String resolveName(Element el, String xpr) {
457        Object ob = resolveAndEvaluateXmlNode(el, xpr);
458        if (ob == null) {
459            return null;
460        }
461        return ob.toString();
462    }
463
464    protected Object resolveAndEvaluateXmlNode(Element el, String xpr) {
465        Object ob = resolve(el, xpr);
466        if (ob == null) {
467            return null;
468        }
469        if (ob instanceof Node) {
470            return ((Node) ob).getText();
471        } else {
472            return ob;
473        }
474    }
475
476    protected void createNewDocument(Element el, DocConfigDescriptor conf) {
477        DocumentModel doc = session.createDocumentModel(conf.getDocType());
478
479        String path = resolvePath(el, conf.getParent());
480        Object nameOb = resolveName(el, conf.getName());
481        String name = null;
482        if (nameOb == null) {
483            if (log.isDebugEnabled()) {
484                log.debug(String.format(MSG_NO_ELEMENT_FOUND, conf.getName(), el.getUniquePath()));
485            }
486            int idx = 1;
487            for (int i = 0; i < docsStack.size(); i++) {
488                if (docsStack.get(i).getType().equals(conf.getDocType())) {
489                    idx++;
490                }
491            }
492            name = conf.getDocType() + "-" + idx;
493        } else {
494            name = nameOb.toString();
495        }
496        doc.setPathInfo(path, name);
497
498        if (log.isDebugEnabled()) {
499            if (conf.getUpdate()) {
500                log.debug(String.format(MSG_UPDATE, path, name, el.getUniquePath(), conf.toString()));
501            } else {
502                log.debug(String.format(MSG_CREATION, path, name, el.getUniquePath(), conf.toString()));
503            }
504        }
505
506        try {
507            if (conf.getUpdate() && session.exists(doc.getRef())) {
508                DocumentModel existingDoc = session.getDocument(doc.getRef());
509
510                // get attributes, if attribute needs to be overwritten, empty in the document
511                for (Object e : el.elements()) {
512                    List<AttributeConfigDescriptor> configs = getAttributConfigs((Element) e);
513                    if (configs != null) {
514                        if (!deletedAttributes.containsKey(existingDoc.getId())) {
515                            deletedAttributes.put(existingDoc.getId(), new ArrayList<String>());
516                        }
517                        for (AttributeConfigDescriptor config : configs) {
518                            String targetDocProperty = config.getTargetDocProperty();
519                            // check deletedAttributes for attribute which should be overwritten
520                            // if it is there, don't empty it a second time
521                            if (config.overwrite
522                                    && !deletedAttributes.get(existingDoc.getId()).contains(targetDocProperty)) {
523                                deletedAttributes.get(existingDoc.getId()).add(targetDocProperty);
524                                existingDoc.setPropertyValue(targetDocProperty, new ArrayList<>());
525                            }
526                        }
527                    }
528                }
529                doc = existingDoc;
530            } else {
531                doc = session.createDocument(doc);
532            }
533        } catch (NuxeoException e) {
534            e.addInfo(String.format(MSG_CREATION, path, name, el.getUniquePath(), conf.toString()));
535            throw e;
536        }
537        pushInStack(doc);
538        elToDoc.put(el, doc);
539    }
540
541    protected void process(Element el) {
542        DocConfigDescriptor createConf = getDocCreationConfig(el);
543        if (createConf != null) {
544            createNewDocument(el, createConf);
545        }
546        List<AttributeConfigDescriptor> configs = getAttributConfigs(el);
547        if (configs != null) {
548            for (AttributeConfigDescriptor config : configs) {
549                processDocAttributes(docsStack.peek(), el, config);
550            }
551
552            DocumentModel doc = popStack();
553            doc.putContextData(XML_IMPORTER_INITIALIZATION, Boolean.TRUE);
554            if (!deferSave) {
555                doc = session.saveDocument(doc);
556            }
557            pushInStack(doc);
558
559            if (createConf != null) {
560                String chain = createConf.getAutomationChain();
561                if (chain != null && !"".equals(chain.trim())) {
562                    try (OperationContext ctx = new OperationContext(session)) {
563                        ctx.putAll(mvelCtx);
564                        ctx.setInput(docsStack.peek());
565                        Framework.getService(AutomationService.class).run(ctx, chain);
566                    } catch (NuxeoException e) {
567                        throw e;
568                    } catch (Exception e) {
569                        ExceptionUtils.checkInterrupt(e);
570                    }
571                }
572            }
573        }
574        for (Object e : el.elements()) {
575            process((Element) e);
576        }
577    }
578
579    private void pushInStack(DocumentModel doc) {
580        mvelCtx.put("changeableDocument", doc);
581        docsStack.push(doc);
582    }
583
584    private DocumentModel popStack() {
585        DocumentModel doc = docsStack.pop();
586        mvelCtx.put("changeableDocument", doc);
587        return doc;
588    }
589
590}