Skip to main content

The XML descriptor file

Each component contains an XML descriptor file. This XML file provides information defining the component: what are the component attributes, what is the component supposed to do and how it interacts with other components, etc.

For example, the structure of the tFileInputDelimited_java.xml descriptor file looks like this:

<COMPONENT>
    <HEADER PLATEFORM="ALL" SERIAL="" VERSION="0.102" STATUS="ALPHA" COMPATIBILITY="ALL"
        AUTHOR="Talend" RELEASE_DATE="20100401A" STARTABLE="true" TYPE="SPARK">
        <SIGNATURE/>
    </HEADER>
    
    <FAMILIES>
        <FAMILY>File/Input</FAMILY>
    </FAMILIES>
    
    <DOCUMENTATION>
        <URL/>
    </DOCUMENTATION>
    
    <CONNECTORS>
        <CONNECTOR CTYPE="FLOW" MAX_INPUT="0" MAX_OUTPUT="1"/>
        <CONNECTOR NAME="REJECT" CTYPE="FLOW" MAX_INPUT="0" MAX_OUTPUT="1" LINE_STYLE="2"
            COLOR="FF0000" BASE_SCHEMA="FLOW"/>
        <CONNECTOR CTYPE="ITERATE" MAX_OUTPUT="1" MAX_INPUT="1"/>
        <CONNECTOR CTYPE="SUBJOB_OK" MAX_INPUT="1"/>
        <CONNECTOR CTYPE="SUBJOB_ERROR" MAX_INPUT="1"/>
        <CONNECTOR CTYPE="COMPONENT_OK"/>
        <CONNECTOR CTYPE="COMPONENT_ERROR"/>
        <CONNECTOR CTYPE="RUN_IF"/>
    </CONNECTORS>
    
    <PARAMETERS>
        <PARAMETER NAME="PROPERTY" FIELD="PROPERTY_TYPE" SHOW="true" NUM_ROW="10"
            REPOSITORY_VALUE="DELIMITED"/>
        <PARAMETER NAME="FILENAMETEXT" FIELD="LABEL" COLOR="0;0;0" NUM_ROW="15">
            <DEFAULT>"When the input source is a stream or a zip file,footer and random shouldn't be
                bigger than 0."</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="FILENAME" FIELD="FILE" NUM_ROW="20" REQUIRED="true"
            REPOSITORY_VALUE="FILE_PATH">
            <DEFAULT>"__COMP_DEFAULT_FILE_DIR__/in.csv"</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="ROWSEPARATOR" FIELD="TEXT" NUM_ROW="30" REPOSITORY_VALUE="ROW_SEPARATOR"
            SHOW_IF="CSV_OPTION=='false'">
            <DEFAULT>"\n"</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="CSVROWSEPARATOR" FIELD="OPENED_LIST" NUM_ROW="30"
            REPOSITORY_VALUE="ROW_SEPARATOR" MAX_LENGTH="2" SHOW_IF="CSV_OPTION=='true'">
            <ITEMS DEFAULT="LF">
                <ITEM NAME="LF" VALUE="&quot;\n&quot;"/>
                <ITEM NAME="CR" VALUE="&quot;\r&quot;"/>
                <ITEM NAME="CRLF" VALUE="&quot;\r\n&quot;"/>
            </ITEMS>
        </PARAMETER>
        <PARAMETER NAME="FIELDSEPARATOR" FIELD="TEXT" NUM_ROW="30" REQUIRED="true"
            REPOSITORY_VALUE="FIELD_SEPARATOR">
            <DEFAULT>";"</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="CSV_OPTION" FIELD="CHECK" REQUIRED="true" REPOSITORY_VALUE="CSV_OPTION"
            NUM_ROW="35">
            <DEFAULT>false</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="ESCAPE_CHAR" FIELD="TEXT" NUM_ROW="35" REQUIRED="true"
            REPOSITORY_VALUE="ESCAPE_CHAR" SHOW_IF="CSV_OPTION == 'true'">
            <DEFAULT>"""</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="TEXT_ENCLOSURE" FIELD="TEXT" NUM_ROW="35" REQUIRED="true"
            REPOSITORY_VALUE="TEXT_ENCLOSURE" SHOW_IF="CSV_OPTION == 'true'">
            <DEFAULT>"""</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="HEADER" FIELD="TEXT" NUM_ROW="40" REPOSITORY_VALUE="HEADER">
            <DEFAULT>0</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="FOOTER" FIELD="TEXT" NUM_ROW="40" REPOSITORY_VALUE="FOOTER"
            SHOW_IF="UNCOMPRESS=='false'">
            <DEFAULT>0</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="LIMIT" FIELD="TEXT" NUM_ROW="40" REPOSITORY_VALUE="LIMIT">
            <DEFAULT/>
        </PARAMETER>
        <PARAMETER NAME="REMOVE_EMPTY_ROW" FIELD="CHECK" REQUIRED="true" NUM_ROW="46"
            REPOSITORY_VALUE="REMOVE_EMPTY_ROW">
            <DEFAULT>true</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="UNCOMPRESS" FIELD="CHECK" NUM_ROW="46">
            <DEFAULT>false</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="DIE_ON_ERROR" FIELD="CHECK" NUM_ROW="46">
            <DEFAULT>false</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="SCHEMA" FIELD="SCHEMA_TYPE" REQUIRED="true" NUM_ROW="44">
            <DEFAULT/>
        </PARAMETER>
        <PARAMETER NAME="SCHEMA_REJECT" FIELD="SCHEMA_TYPE" REQUIRED="true" NUM_ROW="44"
            CONTEXT="REJECT" SHOW="true">
            <TABLE READONLY="true">
                <COLUMN NAME="errorCode" TYPE="id_String" LENGTH="255" READONLY="false"
                    CUSTOM="true"/>
                <COLUMN NAME="errorMessage" TYPE="id_String" LENGTH="255" READONLY="false"
                    CUSTOM="true"/>
            </TABLE>
        </PARAMETER>
    </PARAMETERS>

    <ADVANCED_PARAMETERS>
        <PARAMETER FIELD="DIRECTORY" NAME="TEMP_DIR" NUM_ROW="1" READONLY="false" REQUIRED="true"
            SHOW="false">
            <DEFAULT>"__COMP_DEFAULT_FILE_DIR__"</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="ADVANCED_SEPARATOR" FIELD="CHECK" REQUIRED="true" NUM_ROW="41">
            <DEFAULT>false</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="THOUSANDS_SEPARATOR" FIELD="TEXT" REQUIRED="true" NUM_ROW="41"
            SHOW_IF="(ADVANCED_SEPARATOR == 'true')">
            <DEFAULT>","</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="DECIMAL_SEPARATOR" FIELD="TEXT" REQUIRED="true" NUM_ROW="41"
            SHOW_IF="(ADVANCED_SEPARATOR == 'true')">
            <DEFAULT>"."</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="RANDOM" FIELD="CHECK" REQUIRED="true" NUM_ROW="45"
            SHOW_IF="(CSV_OPTION == 'false') AND (UNCOMPRESS=='false')">
            <DEFAULT>false</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="NB_RANDOM" FIELD="TEXT" REQUIRED="true" NUM_ROW="45"
            SHOW_IF="(CSV_OPTION == 'false') and (RANDOM == 'true') AND (UNCOMPRESS=='false')">
            <DEFAULT>10</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="TRIMALL" FIELD="CHECK" NUM_ROW="46">
            <DEFAULT>false</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="TRIMSELECT" FIELD="TABLE" NUM_ROW="47" NB_LINES="5"
            SHOW_IF="TRIMALL=='false'">
            <ITEMS BASED_ON_SCHEMA="true">
                <ITEM NAME="TRIM" FIELD="CHECK"/>
            </ITEMS>
        </PARAMETER>
        <PARAMETER NAME="CHECK_FIELDS_NUM" FIELD="CHECK" NUM_ROW="46">
            <DEFAULT>false</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="CHECK_DATE" FIELD="CHECK" NUM_ROW="46">
            <DEFAULT>false</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="ENCODING" FIELD="ENCODING_TYPE" NUM_ROW="45" REQUIRED="true"
            REPOSITORY_VALUE="ENCODING">
            <DEFAULT>"ISO-8859-15"</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="SPLITRECORD" FIELD="CHECK" REQUIRED="true" NUM_ROW="50"
            SHOW_IF="CSV_OPTION == 'false'" REPOSITORY_VALUE="SPLITRECORD">
            <DEFAULT>false</DEFAULT>
        </PARAMETER>
        <PARAMETER NAME="DESTINATION" FIELD="TEXT" NUM_ROW="90" SHOW="false">
            <DEFAULT/>
        </PARAMETER>
    </ADVANCED_PARAMETERS>
    
    <CODEGENERATION>
        <IMPORTS>
            <IMPORT NAME="Talen File Enhanced" MODULE="talend_file_enhanced_20070724.jar"
                REQUIRED="true"/>
            <IMPORT NAME="Talend_CSV" MODULE="talendcsv.jar" REQUIRED="true"/>
        </IMPORTS>
    </CODEGENERATION>
    <RETURNS>
        <RETURN NAME="NB_LINE" TYPE="id_Integer" AVAILABILITY="AFTER"/>
    </RETURNS>
</COMPONENT>

The HEADER element defines the basic information about the component, such as the version, the author of the component, etc.

<HEADER PLATEFORM="ALL" SERIAL="" VERSION="0.102" STATUS="ALPHA"
        COMPATIBILITY="ALL" AUTHOR="Talend" RELEASE_DATE="20100401A"
        STARTABLE="true" TYPE="SPARK">
        <SIGNATURE />
</HEADER>

The STARTABLE attribute in the header specifies whether the component can be the first component of a subJob. It is always set to true for input components, such as tMysqlInput, tFileInputDelimited, and it is set to false for output components, such as tMysqlOutput.

The TYPE attribute in the header specifies the type of the component. With this attribute set, the component will be displayed only for the corresponding Job or Route type in the Palette. Available types are:

  • DI: specifies a Data Integration component. This is the default component type if the TYPE attribute is missing.
  • CAMEL: specifies a Mediation Route component.
  • SPARK: specifies a Big Data Spark component.
  • SPARKSTREAMING: specifies a Big Data Spark Streaming component.

If you want to allow your component to have multiple output data flows, for example, both main and reject connections, add a HAS_CONDITIONAL_OUTPUTS="true" attribute in the header. Note that this attribute is not supported for a Mediation Route component.

The FAMILIES element specifies the family group(s) where the component should be put in the Palette. A component can be put in several groups, in System and Orchestration groups for example:

<FAMILIES>
    <FAMILY>System</FAMILY>
    <FAMILY>Orchestration</FAMILY>
</FAMILIES>

The CONNECTORS element defines the type of connection or link the component uses to connect to other component(s) in the Job. This defines how this component will interact with other components.

<CONNECTORS>
	<CONNECTOR CTYPE="FLOW" MAX_INPUT="0" MAX_OUTPUT="1"/>
	<CONNECTOR NAME="REJECT" CTYPE="FLOW" MAX_INPUT="0"
		MAX_OUTPUT="1" LINE_STYLE="2" COLOR="FF0000" BASE_SCHEMA="FLOW" />
	<CONNECTOR CTYPE="ITERATE" MAX_OUTPUT="1" MAX_INPUT="1" />
	<CONNECTOR CTYPE="SUBJOB_OK" MAX_INPUT="1" />
	<CONNECTOR CTYPE="SUBJOB_ERROR" MAX_INPUT="1" />
	<CONNECTOR CTYPE="COMPONENT_OK" />
	<CONNECTOR CTYPE="COMPONENT_ERROR" />
	<CONNECTOR CTYPE="RUN_IF" />
</CONNECTORS>

The different types of accepted triggers or flow connectors show on the contextual menu of the component:

The CONNECTOR CTYPE attribute defines the connector type.

The FLOW type means it can handle a Main or a Reject data flow. The MAX_INPUT attribute defines the maximum number of allowed input connectors linked to this component, and the MAX_OUTPUT attribute defines the maximum number of allowed output connectors that the component can connect to. For example:

<CONNECTOR CTYPE="FLOW" MAX_INPUT="0" MAX_OUTPUT="1"/>

In this example, the component does not allow any Main data flow linked to it and sends only one Main data flow to another component.

The PARAMETERS element defines the component properties. The PARAMETER NAME attribute defines the property name, the PARAMETER FIELD attribute defines the field type and thus determines the type of value that can be set.

<PARAMETERS>
     <PARAMETER NAME="PROPERTY" FIELD="PROPERTY_TYPE" SHOW="true"
         NUM_ROW="10" REPOSITORY_VALUE="DELIMITED" />
 
......
      
     <PARAMETER NAME="SCHEMA_REJECT" FIELD="SCHEMA_TYPE"
         REQUIRED="true" NUM_ROW="44" CONTEXT="REJECT" SHOW="true">
         <TABLE READONLY="true">
             <COLUMN NAME="errorCode" TYPE="id_String" LENGTH="255"
                 READONLY="false" CUSTOM="true" />
             <COLUMN NAME="errorMessage" TYPE="id_String"
                 LENGTH="255" READONLY="false" CUSTOM="true" />
         </TABLE>
     </PARAMETER>
</PARAMETERS>

Those are displayed in the Basic settings view of the component.

The component attributes depend on the component function and on the information required to be input by the user to let the component work as expected. As the information may vary from one user to another, this information can be stored in a variable as a component attribute.

The ADVANCED_PARAMETERS element defines the component advanced properties.

<ADVANCED_PARAMETERS>
    <PARAMETER
      FIELD="DIRECTORY"
      NAME="TEMP_DIR"
      NUM_ROW="1"
      READONLY="false"
      REQUIRED="true"
      SHOW="false">
      <DEFAULT>"__COMP_DEFAULT_FILE_DIR__"</DEFAULT>
    </PARAMETER>
     
......
    <PARAMETER
      NAME="DESTINATION"
      FIELD="TEXT"
      NUM_ROW="90"
      SHOW="false">
        <DEFAULT></DEFAULT>
    </PARAMETER>     
</ADVANCED_PARAMETERS>

It is displayed in the Advanced Settings panel of the component.

The CODEGENERATION element declares the jar files which will be used in the component. These jar files should be placed in the component folder.

<CODEGENERATION>
    <IMPORTS>
        <IMPORT NAME="Talend File Enhanced"
            MODULE="talend_file_enhanced_20070724.jar" REQUIRED="true" />
        <IMPORT NAME="Talend_CSV" MODULE="talendcsv.jar"
            REQUIRED="true" />
    </IMPORTS>
</CODEGENERATION>

The RETURNS element defines the global variables returned by the component. The RETURN NAME attribute defines the variable name, the TYPE attribute defines the data type of variable and the AVAILABILITY attribute defines the usage of the variable. The AVAILABILITY attribute can have AFTER or FLOW as a value.

<RETURNS>
        <RETURN NAME="NB_LINE" TYPE="id_Integer" AVAILABILITY="AFTER" />
</RETURNS>

The global variables can be used in other components in the same subJob or in other subJobs depending on the AVAILABILITY attribute definition.

The most common global variable is NB_LINE, which is usually used to count the total number of processed lines.

Did this page help you?

If you find any issues with this page or its content – a typo, a missing step, or a technical error – let us know how we can improve!