001/*
002 * (C) Copyright 2002-2015 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Nuxeo - initial API and implementation
018 */
019
020package org.nuxeo.ecm.platform.importer.xml.parser;
021
022import org.apache.commons.io.FileUtils;
023import org.apache.commons.logging.Log;
024import org.apache.commons.logging.LogFactory;
025import org.dom4j.Attribute;
026import org.dom4j.Document;
027import org.dom4j.DocumentException;
028import org.dom4j.Element;
029import org.dom4j.InvalidXPathException;
030import org.dom4j.Node;
031import org.dom4j.Text;
032import org.dom4j.io.SAXReader;
033import org.dom4j.tree.DefaultText;
034import org.mvel2.MVEL;
035import org.nuxeo.common.Environment;
036import org.nuxeo.common.utils.ExceptionUtils;
037import org.nuxeo.common.utils.ZipUtils;
038import org.nuxeo.ecm.automation.AutomationService;
039import org.nuxeo.ecm.automation.OperationContext;
040import org.nuxeo.ecm.core.api.Blob;
041import org.nuxeo.ecm.core.api.Blobs;
042import org.nuxeo.ecm.core.api.CoreSession;
043import org.nuxeo.ecm.core.api.DocumentModel;
044import org.nuxeo.ecm.core.api.NuxeoException;
045import org.nuxeo.ecm.core.api.model.Property;
046import org.nuxeo.ecm.core.api.model.impl.primitives.BlobProperty;
047import org.nuxeo.ecm.core.schema.types.ListType;
048import org.nuxeo.runtime.api.Framework;
049
050import java.io.File;
051import java.io.IOException;
052import java.io.InputStream;
053import java.io.Serializable;
054import java.util.ArrayList;
055import java.util.HashMap;
056import java.util.List;
057import java.util.Map;
058import java.util.Stack;
059
060/**
061 * Main implementation class for delivering the Import logic
062 *
063 * @author <a href="mailto:tdelprat@nuxeo.com">Tiry</a>
064 */
065public class XMLImporterServiceImpl {
066
067    protected static final String FILE_PROPERTY = "file";
068
069    protected static final String CONTENT_PROPERTY = "content";
070
071    protected static final String MIME_TYPE_PROPERTY = "mimetype";
072
073    protected static final String FILE_NAME_PROPERTY = "filename";
074
075    private static final String MSG_NO_ELEMENT_FOUND = "**CREATION**\n"
076            + "No element \"%s\" found in %s, use the DOC_TYPE-INDEX value";
077
078    private static final String MSG_CREATION = "**CREATION**\n"
079            + "Try to create document in %s with name %s based on \"%s\" fragment " + "with the following conf: %s\n";
080
081    private static final String MSG_UPDATE = "**DOCUMENT UPDATE**\n"
082            + "Try to update document in %s with name %s based on \"%s\" fragment " + "with the following conf: %s\n";
083
084    private static final String MSG_UPDATE_PROPERTY_TRACE = "**PROPERTY UPDATE**\n"
085            + "Value found for %s in %s is \"%s\". With the following conf: %s";
086
087    private static final String MSG_UPDATE_PROPERTY = "**PROPERTY UPDATE**\n"
088            + "Try to set value into %s property based on %s element on document \"%s\" (%s). Conf activated: %s";
089
090    public static final Log log = LogFactory.getLog(XMLImporterServiceImpl.class);
091
092    public static final String XML_IMPORTER_INITIALIZATION = "org.nuxeo.xml.importer.initialization";
093
094    protected CoreSession session;
095
096    protected DocumentModel rootDoc;
097
098    protected Stack<DocumentModel> docsStack;
099
100    protected Map<String, List<String>> deletedAttributes = new HashMap<>();
101
102    protected Map<String, Object> mvelCtx = new HashMap<>();
103
104    protected Map<Element, DocumentModel> elToDoc = new HashMap<>();
105
106    protected ParserConfigRegistry registry;
107
108    protected Boolean deferSave = false;
109
110    public XMLImporterServiceImpl(DocumentModel rootDoc, ParserConfigRegistry registry) {
111        this(rootDoc, registry, null, false);
112    }
113
114    public XMLImporterServiceImpl(DocumentModel rootDoc, ParserConfigRegistry registry,
115            Map<String, Object> mvelContext, boolean deferSave) {
116        if (mvelContext != null) {
117            mvelCtx.putAll(mvelContext);
118        }
119
120        session = rootDoc.getCoreSession();
121        this.rootDoc = rootDoc;
122        this.deferSave = deferSave;
123
124        docsStack = new Stack<>();
125        pushInStack(rootDoc);
126        mvelCtx.put("root", rootDoc);
127        mvelCtx.put("docs", docsStack);
128        mvelCtx.put("session", session);
129
130        this.registry = registry;
131    }
132
133    protected ParserConfigRegistry getRegistry() {
134        return registry;
135    }
136
137    protected DocConfigDescriptor getDocCreationConfig(Element el) {
138        for (DocConfigDescriptor conf : getRegistry().getDocCreationConfigs()) {
139            // direct tagName match
140            if (conf.getTagName().equals(el.getName())) {
141                return conf;
142            } else {
143                // try xpath match
144                try {
145                    if (el.matches(conf.getTagName())) {
146                        return conf;
147                    }
148                } catch (InvalidXPathException e) {
149                    // NOP
150                }
151            }
152        }
153        return null;
154    }
155
156    protected List<AttributeConfigDescriptor> getAttributConfigs(Element el) {
157        List<AttributeConfigDescriptor> result = new ArrayList<>();
158        for (AttributeConfigDescriptor conf : getRegistry().getAttributConfigs()) {
159            if (conf.getTagName().equals(el.getName())) {
160                result.add(conf);
161            } else {
162                // try xpath match
163                try {
164                    if (el.matches(conf.getTagName())) {
165                        result.add(conf);
166                    }
167                } catch (InvalidXPathException e) {
168                    // NOP
169                }
170            }
171        }
172        return result;
173    }
174
175    protected File workingDirectory;
176
177    private AutomationService automationService;
178
179    public List<DocumentModel> parse(InputStream is) throws IOException {
180        mvelCtx.put("source", is);
181        try {
182            Document doc;
183            doc = new SAXReader().read(is);
184            workingDirectory = null;
185            return parse(doc);
186        } catch (DocumentException e) {
187            throw new IOException(e);
188        }
189    }
190
191    public List<DocumentModel> parse(File file) throws IOException {
192        mvelCtx.put("source", file);
193
194        Document doc = null;
195        File directory = null;
196        try {
197            doc = new SAXReader().read(file);
198            workingDirectory = file.getParentFile();
199        } catch (DocumentException e) {
200            File tmp = Environment.getDefault().getTemp();
201            directory = new File(tmp, file.getName() + System.currentTimeMillis());
202            directory.mkdir();
203            ZipUtils.unzip(file, directory);
204            for (File child : directory.listFiles()) {
205                if (child.getName().endsWith(".xml")) {
206                    return parse(child);
207                }
208            }
209            throw new NuxeoException("Can not find XML file inside the zip archive", e);
210        } finally {
211            FileUtils.deleteQuietly(directory);
212        }
213        return parse(doc);
214    }
215
216    public List<DocumentModel> parse(Document doc) {
217        Element root = doc.getRootElement();
218        elToDoc = new HashMap<>();
219        mvelCtx.put("xml", doc);
220        mvelCtx.put("map", elToDoc);
221        process(root);
222
223        // defer saveDocument to end of operation
224        if (deferSave) {
225            ArrayList<DocumentModel> a = new ArrayList<>();
226            DocumentModel d = null;
227            while (docsStack.size() > 0) {
228                d = popStack();
229                d.putContextData(XML_IMPORTER_INITIALIZATION, Boolean.TRUE);
230                d = session.saveDocument(d);
231                a.add(d);
232            }
233            return a;
234        } else {
235            return new ArrayList<>(docsStack);
236        }
237    }
238
239    protected Object resolveComplex(Element el, AttributeConfigDescriptor conf) {
240        Map<String, Object> propValue = new HashMap<>();
241        for (String name : conf.getMapping().keySet()) {
242            propValue.put(name, resolveAndEvaluateXmlNode(el, conf.getMapping().get(name)));
243        }
244
245        return propValue;
246    }
247
248    protected Blob resolveBlob(Element el, AttributeConfigDescriptor conf, String propertyName) {
249        @SuppressWarnings("unchecked")
250        Map<String, Object> propValues = (Map<String, Object>) resolveComplex(el, conf);
251
252        if (propValues.containsKey(propertyName)) {
253            try {
254                Blob blob = null;
255                String content = (String) propValues.get(propertyName);
256                if (content != null && workingDirectory != null) {
257                    File file = new File(workingDirectory, content.trim());
258                    if (file.exists()) {
259                        blob = Blobs.createBlob(file);
260                    }
261                }
262                if (blob == null && content != null) {
263                    blob = Blobs.createBlob(content);
264                }
265
266                if (blob != null) {
267                    if (propValues.containsKey(MIME_TYPE_PROPERTY)) {
268                        blob.setMimeType((String) propValues.get(MIME_TYPE_PROPERTY));
269                    }
270                    if (propValues.containsKey(FILE_NAME_PROPERTY)) {
271                        blob.setFilename((String) propValues.get(FILE_NAME_PROPERTY));
272                    }
273                }
274
275                return blob;
276            } catch (IOException e) {
277                throw new RuntimeException(e);
278            }
279        }
280        return null;
281    }
282
283    @SuppressWarnings("unchecked")
284    protected void processDocAttributes(DocumentModel doc, Element el, AttributeConfigDescriptor conf) {
285        String targetDocProperty = conf.getTargetDocProperty();
286
287        if (log.isDebugEnabled()) {
288            log.debug(String.format(MSG_UPDATE_PROPERTY, targetDocProperty, el.getUniquePath(), doc.getPathAsString(),
289                    doc.getType(), conf.toString()));
290        }
291        Property property = doc.getProperty(targetDocProperty);
292
293        if (property.isScalar()) {
294            Object value = resolveAndEvaluateXmlNode(el, conf.getSingleXpath());
295            if (log.isTraceEnabled()) {
296                log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value,
297                        conf.toString()));
298            }
299            property.setValue(value);
300
301        } else if (property.isComplex()) {
302
303            if (property instanceof BlobProperty) {
304                Object value = resolveBlob(el, conf, CONTENT_PROPERTY);
305                if (log.isTraceEnabled()) {
306                    log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value,
307                            conf.toString()));
308                }
309                property.setValue(value);
310            } else {
311                Object value = resolveComplex(el, conf);
312                if (log.isTraceEnabled()) {
313                    log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value,
314                            conf.toString()));
315                }
316                property.setValue(value);
317            }
318
319        } else if (property.isList()) {
320
321            ListType lType = (ListType) property.getType();
322
323            Serializable value;
324
325            if (lType.getFieldType().isSimpleType()) {
326                value = (Serializable) resolveAndEvaluateXmlNode(el, conf.getSingleXpath());
327                if (value != null) {
328                    Object values = property.getValue();
329                    if (values == null) {
330                        property.setValue(new Object[] { value });
331                    } else if (values instanceof Object[]) {
332                        int len = ((Object[]) values).length;
333                        Object[] newValues = new Object[len + 1];
334                        System.arraycopy(values, 0, newValues, 0, len);
335                        newValues[len] = value;
336                        property.setValue(newValues);
337                    } else {
338                        log.error("Simple multi value property " + targetDocProperty
339                                + " is not an Array");
340                    }
341                }
342            } else {
343                Map<String, Object> props = (Map<String, Object>) resolveComplex(el, conf);
344                if (props.containsKey(FILE_PROPERTY)) {
345                    Blob blob = resolveBlob(el, conf, FILE_PROPERTY);
346                    props.put(FILE_PROPERTY, blob);
347                }
348                property.addValue(props);
349                value = (Serializable) props;
350            }
351
352            if (log.isTraceEnabled()) {
353                log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value,
354                        conf.toString()));
355            }
356        }
357    }
358
359    protected Map<String, Object> getMVELContext(Element el) {
360        mvelCtx.put("currentDocument", docsStack.peek());
361        mvelCtx.put("currentElement", el);
362        mvelCtx.put("Fn", new MVELImporterFunction(session, docsStack, elToDoc, el));
363        return mvelCtx;
364    }
365
366    protected Object resolve(Element el, String xpr) {
367        if (xpr == null) {
368            return null;
369        }
370
371        if (xpr.startsWith("#{") && xpr.endsWith("}")) { // MVEL
372            xpr = xpr.substring(2, xpr.length() - 1);
373            return resolveMVEL(el, xpr);
374        } else if (xpr.contains("{{")) { // String containing XPaths
375            StringBuffer sb = new StringBuffer();
376            int idx = xpr.indexOf("{{");
377            while (idx >= 0) {
378                int idx2 = xpr.indexOf("}}", idx);
379                if (idx2 > 0) {
380                    sb.append(xpr.substring(0, idx));
381                    String xpath = xpr.substring(idx + 2, idx2);
382                    sb.append(resolveAndEvaluateXmlNode(el, xpath));
383                    xpr = xpr.substring(idx2);
384                } else {
385                    sb.append(xpr);
386                    xpr = "";
387                }
388                idx = xpr.indexOf("{{");
389            }
390            return sb.toString();
391        } else {
392            return resolveXP(el, xpr); // default to pure XPATH
393        }
394    }
395
396    protected Object resolveMVEL(Element el, String xpr) {
397        Map<String, Object> ctx = new HashMap<>(getMVELContext(el));
398        Serializable compiled = MVEL.compileExpression(xpr);
399        return MVEL.executeExpression(compiled, ctx);
400    }
401
402    protected Object resolveXP(Element el, String xpr) {
403        List<Object> nodes = el.selectNodes(xpr);
404        if (nodes.size() == 1) {
405            return nodes.get(0);
406        } else if (nodes.size() > 1) {
407            // Workaround for NXP-11834
408            if (xpr.endsWith("text()")) {
409                String value = "";
410                for (Object node : nodes) {
411                    if (!(node instanceof DefaultText)) {
412                        String msg = "Text selector must return a string (expr:\"%s\") element %s";
413                        log.error(String.format(msg, xpr, el.getStringValue()));
414                        return value;
415                    }
416                    value += ((DefaultText) node).getText();
417                }
418                return new DefaultText(value);
419            }
420            return nodes;
421        }
422        return null;
423    }
424
425    protected String resolvePath(Element el, String xpr) {
426        Object ob = resolve(el, xpr);
427        if (ob == null) {
428            for (int i = 0; i < docsStack.size(); i++) {
429                if (docsStack.get(i).isFolder()) {
430                    return docsStack.get(i).getPathAsString();
431                }
432            }
433        } else {
434            if (ob instanceof DocumentModel) {
435                return ((DocumentModel) ob).getPathAsString();
436            } else if (ob instanceof Node) {
437                if (ob instanceof Element) {
438                    Element targetElement = (Element) ob;
439                    DocumentModel target = elToDoc.get(targetElement);
440                    if (target != null) {
441                        return target.getPathAsString();
442                    } else {
443                        return targetElement.getText();
444                    }
445                } else if (ob instanceof Attribute) {
446                    return ((Attribute) ob).getValue();
447                } else if (ob instanceof Text) {
448                    return ((Text) ob).getText();
449                } else if (ob.getClass().isAssignableFrom(Attribute.class)) {
450                    return ((Attribute) ob).getValue();
451                }
452            } else {
453                return ob.toString();
454            }
455        }
456        return rootDoc.getPathAsString();
457    }
458
459    protected String resolveName(Element el, String xpr) {
460        Object ob = resolveAndEvaluateXmlNode(el, xpr);
461        if (ob == null) {
462            return null;
463        }
464        return ob.toString();
465    }
466
467    protected Object resolveAndEvaluateXmlNode(Element el, String xpr) {
468        Object ob = resolve(el, xpr);
469        if (ob == null) {
470            return null;
471        }
472        if (ob instanceof Node) {
473            return ((Node) ob).getText();
474        } else {
475            return ob;
476        }
477    }
478
479    protected void createNewDocument(Element el, DocConfigDescriptor conf) {
480        DocumentModel doc = session.createDocumentModel(conf.getDocType());
481
482        String path = resolvePath(el, conf.getParent());
483        Object nameOb = resolveName(el, conf.getName());
484        String name = null;
485        if (nameOb == null) {
486            if (log.isDebugEnabled()) {
487                log.debug(String.format(MSG_NO_ELEMENT_FOUND, conf.getName(), el.getUniquePath()));
488            }
489            int idx = 1;
490            for (int i = 0; i < docsStack.size(); i++) {
491                if (docsStack.get(i).getType().equals(conf.getDocType())) {
492                    idx++;
493                }
494            }
495            name = conf.getDocType() + "-" + idx;
496        } else {
497            name = nameOb.toString();
498        }
499        doc.setPathInfo(path, name);
500
501        if (log.isDebugEnabled()) {
502            if (conf.getUpdate()) {
503                log.debug(String.format(MSG_UPDATE, path, name, el.getUniquePath(), conf.toString()));
504            } else {
505                log.debug(String.format(MSG_CREATION, path, name, el.getUniquePath(), conf.toString()));
506            }
507        }
508
509        try {
510            if (conf.getUpdate() && session.exists(doc.getRef())) {
511                DocumentModel existingDoc = session.getDocument(doc.getRef());
512
513                // get attributes, if attribute needs to be overwritten, empty in the document
514                for (Object e : el.elements()) {
515                    List<AttributeConfigDescriptor> configs = getAttributConfigs((Element) e);
516                    if (configs != null) {
517                        if (!deletedAttributes.containsKey(existingDoc.getId())) {
518                            deletedAttributes.put(existingDoc.getId(), new ArrayList<String>());
519                        }
520                        for (AttributeConfigDescriptor config : configs) {
521                            String targetDocProperty = config.getTargetDocProperty();
522                            // check deletedAttributes for attribute which should be overwritten
523                            // if it is there, don't empty it a second time
524                            if (config.overwrite
525                                    && !deletedAttributes.get(existingDoc.getId()).contains(targetDocProperty)) {
526                                deletedAttributes.get(existingDoc.getId()).add(targetDocProperty);
527                                existingDoc.setPropertyValue(targetDocProperty, new ArrayList<>());
528                            }
529                        }
530                    }
531                }
532                doc = existingDoc;
533            } else {
534                doc = session.createDocument(doc);
535            }
536        } catch (NuxeoException e) {
537            e.addInfo(String.format(MSG_CREATION, path, name, el.getUniquePath(), conf.toString()));
538            throw e;
539        }
540        pushInStack(doc);
541        elToDoc.put(el, doc);
542    }
543
544    protected void process(Element el) {
545        DocConfigDescriptor createConf = getDocCreationConfig(el);
546        if (createConf != null) {
547            createNewDocument(el, createConf);
548        }
549        List<AttributeConfigDescriptor> configs = getAttributConfigs(el);
550        if (configs != null) {
551            for (AttributeConfigDescriptor config : configs) {
552                processDocAttributes(docsStack.peek(), el, config);
553            }
554
555            DocumentModel doc = popStack();
556            doc.putContextData(XML_IMPORTER_INITIALIZATION, Boolean.TRUE);
557            if (!deferSave) {
558                doc = session.saveDocument(doc);
559            }
560            pushInStack(doc);
561
562            if (createConf != null) {
563                String chain = createConf.getAutomationChain();
564                if (chain != null && !"".equals(chain.trim())) {
565                    try (OperationContext ctx = new OperationContext(session)) {
566                        ctx.putAll(mvelCtx);
567                        ctx.setInput(docsStack.peek());
568                        getAutomationService().run(ctx, chain);
569                    } catch (NuxeoException e) {
570                        throw e;
571                    } catch (Exception e) {
572                        ExceptionUtils.checkInterrupt(e);
573                    }
574                }
575            }
576        }
577        for (Object e : el.elements()) {
578            process((Element) e);
579        }
580    }
581
582    private AutomationService getAutomationService() {
583        if (automationService == null) {
584            automationService = Framework.getService(AutomationService.class);
585        }
586        return automationService;
587
588    }
589
590    private void pushInStack(DocumentModel doc) {
591        mvelCtx.put("changeableDocument", doc);
592        docsStack.push(doc);
593    }
594
595    private DocumentModel popStack() {
596        DocumentModel doc = docsStack.pop();
597        mvelCtx.put("changeableDocument", doc);
598        return doc;
599    }
600
601}